1 #include "log.h" 2 #include "stat_cache.h" 3 #include "fdevent.h" 4 #include "etag.h" 5 6 #include <sys/types.h> 7 #include <sys/stat.h> 8 9 #include <stdlib.h> 10 #include <string.h> 11 #include <errno.h> 12 #include <unistd.h> 13 #include <stdio.h> 14 #include <fcntl.h> 15 #include <assert.h> 16 17 #ifdef HAVE_ATTR_ATTRIBUTES_H 18 # include <attr/attributes.h> 19 #endif 20 21 #ifdef HAVE_SYS_EXTATTR_H 22 # include <sys/extattr.h> 23 #endif 24 25 #ifdef HAVE_FAM_H 26 # include <fam.h> 27 #endif 28 29 #include "sys-mmap.h" 30 31 /* NetBSD 1.3.x needs it */ 32 #ifndef MAP_FAILED 33 # define MAP_FAILED -1 34 #endif 35 36 #ifndef O_LARGEFILE 37 # define O_LARGEFILE 0 38 #endif 39 40 #ifndef HAVE_LSTAT 41 # define lstat stat 42 #endif 43 44 #if 0 45 /* enables debug code for testing if all nodes in the stat-cache as accessable */ 46 #define DEBUG_STAT_CACHE 47 #endif 48 49 /* 50 * stat-cache 51 * 52 * we cache the stat() calls in our own storage 53 * the directories are cached in FAM 54 * 55 * if we get a change-event from FAM, we increment the version in the FAM->dir mapping 56 * 57 * if the stat()-cache is queried we check if the version id for the directory is the 58 * same and return immediatly. 59 * 60 * 61 * What we need: 62 * 63 * - for each stat-cache entry we need a fast indirect lookup on the directory name 64 * - for each FAMRequest we have to find the version in the directory cache (index as userdata) 65 * 66 * stat <<-> directory <-> FAMRequest 67 * 68 * if file is deleted, directory is dirty, file is rechecked ... 69 * if directory is deleted, directory mapping is removed 70 * 71 * */ 72 73 #ifdef HAVE_FAM_H 74 typedef struct { 75 FAMRequest *req; 76 77 buffer *name; 78 79 int version; 80 } fam_dir_entry; 81 #endif 82 83 /* the directory name is too long to always compare on it 84 * - we need a hash 85 * - the hash-key is used as sorting criteria for a tree 86 * - a splay-tree is used as we can use the caching effect of it 87 */ 88 89 /* we want to cleanup the stat-cache every few seconds, let's say 10 90 * 91 * - remove entries which are outdated since 30s 92 * - remove entries which are fresh but havn't been used since 60s 93 * - if we don't have a stat-cache entry for a directory, release it from the monitor 94 */ 95 96 #ifdef DEBUG_STAT_CACHE 97 typedef struct { 98 int *ptr; 99 100 size_t used; 101 size_t size; 102 } fake_keys; 103 104 static fake_keys ctrl; 105 #endif 106 107 stat_cache *stat_cache_init(void) { 108 stat_cache *sc = NULL; 109 110 sc = calloc(1, sizeof(*sc)); 111 112 sc->dir_name = buffer_init(); 113 sc->hash_key = buffer_init(); 114 115 #ifdef HAVE_FAM_H 116 sc->fam_fcce_ndx = -1; 117 #endif 118 119 #ifdef DEBUG_STAT_CACHE 120 ctrl.size = 0; 121 #endif 122 123 return sc; 124 } 125 126 static stat_cache_entry * stat_cache_entry_init(void) { 127 stat_cache_entry *sce = NULL; 128 129 sce = calloc(1, sizeof(*sce)); 130 131 sce->name = buffer_init(); 132 sce->etag = buffer_init(); 133 sce->content_type = buffer_init(); 134 135 return sce; 136 } 137 138 static void stat_cache_entry_free(void *data) { 139 stat_cache_entry *sce = data; 140 if (!sce) return; 141 142 buffer_free(sce->etag); 143 buffer_free(sce->name); 144 buffer_free(sce->content_type); 145 146 free(sce); 147 } 148 149 #ifdef HAVE_FAM_H 150 static fam_dir_entry * fam_dir_entry_init(void) { 151 fam_dir_entry *fam_dir = NULL; 152 153 fam_dir = calloc(1, sizeof(*fam_dir)); 154 155 fam_dir->name = buffer_init(); 156 157 return fam_dir; 158 } 159 160 static void fam_dir_entry_free(FAMConnection *fc, void *data) { 161 fam_dir_entry *fam_dir = data; 162 163 if (!fam_dir) return; 164 165 FAMCancelMonitor(fc, fam_dir->req); 166 167 buffer_free(fam_dir->name); 168 free(fam_dir->req); 169 170 free(fam_dir); 171 } 172 #endif 173 174 void stat_cache_free(stat_cache *sc) { 175 while (sc->files) { 176 int osize; 177 splay_tree *node = sc->files; 178 179 osize = sc->files->size; 180 181 stat_cache_entry_free(node->data); 182 sc->files = splaytree_delete(sc->files, node->key); 183 184 force_assert(osize - 1 == splaytree_size(sc->files)); 185 } 186 187 buffer_free(sc->dir_name); 188 buffer_free(sc->hash_key); 189 190 #ifdef HAVE_FAM_H 191 while (sc->dirs) { 192 int osize; 193 splay_tree *node = sc->dirs; 194 195 osize = sc->dirs->size; 196 197 fam_dir_entry_free(&sc->fam, node->data); 198 sc->dirs = splaytree_delete(sc->dirs, node->key); 199 200 if (osize == 1) { 201 force_assert(NULL == sc->dirs); 202 } else { 203 force_assert(osize == (sc->dirs->size + 1)); 204 } 205 } 206 207 if (-1 != sc->fam_fcce_ndx) { 208 /* fd events already gone */ 209 sc->fam_fcce_ndx = -1; 210 211 FAMClose(&sc->fam); 212 } 213 #endif 214 free(sc); 215 } 216 217 #if defined(HAVE_XATTR) 218 static int stat_cache_attr_get(buffer *buf, char *name) { 219 int attrlen; 220 int ret; 221 222 buffer_string_prepare_copy(buf, 1023); 223 attrlen = buf->size - 1; 224 if(0 == (ret = attr_get(name, "Content-Type", buf->ptr, &attrlen, 0))) { 225 buffer_commit(buf, attrlen); 226 } 227 return ret; 228 } 229 #elif defined(HAVE_EXTATTR) 230 static int stat_cache_attr_get(buffer *buf, char *name) { 231 ssize_t attrlen; 232 233 buffer_string_prepare_copy(buf, 1023); 234 235 if (-1 != (attrlen = extattr_get_file(name, EXTATTR_NAMESPACE_USER, "Content-Type", buf->ptr, buf->size - 1))) { 236 buf->used = attrlen + 1; 237 buf->ptr[attrlen] = '\0'; 238 return 0; 239 } 240 return -1; 241 } 242 #endif 243 244 /* the famous DJB hash function for strings */ 245 static uint32_t hashme(buffer *str) { 246 uint32_t hash = 5381; 247 const char *s; 248 for (s = str->ptr; *s; s++) { 249 hash = ((hash << 5) + hash) + *s; 250 } 251 252 hash &= ~(((uint32_t)1) << 31); /* strip the highest bit */ 253 254 return hash; 255 } 256 257 #ifdef HAVE_FAM_H 258 handler_t stat_cache_handle_fdevent(server *srv, void *_fce, int revent) { 259 size_t i; 260 stat_cache *sc = srv->stat_cache; 261 size_t events; 262 263 UNUSED(_fce); 264 /* */ 265 266 if (revent & FDEVENT_IN) { 267 events = FAMPending(&sc->fam); 268 269 for (i = 0; i < events; i++) { 270 FAMEvent fe; 271 fam_dir_entry *fam_dir; 272 splay_tree *node; 273 int ndx, j; 274 275 FAMNextEvent(&sc->fam, &fe); 276 277 /* handle event */ 278 279 switch(fe.code) { 280 case FAMChanged: 281 case FAMDeleted: 282 case FAMMoved: 283 /* if the filename is a directory remove the entry */ 284 285 fam_dir = fe.userdata; 286 fam_dir->version++; 287 288 /* file/dir is still here */ 289 if (fe.code == FAMChanged) break; 290 291 /* we have 2 versions, follow and no-follow-symlink */ 292 293 for (j = 0; j < 2; j++) { 294 buffer_copy_string(sc->hash_key, fe.filename); 295 buffer_append_int(sc->hash_key, j); 296 297 ndx = hashme(sc->hash_key); 298 299 sc->dirs = splaytree_splay(sc->dirs, ndx); 300 node = sc->dirs; 301 302 if (node && (node->key == ndx)) { 303 int osize = splaytree_size(sc->dirs); 304 305 fam_dir_entry_free(&sc->fam, node->data); 306 sc->dirs = splaytree_delete(sc->dirs, ndx); 307 308 force_assert(osize - 1 == splaytree_size(sc->dirs)); 309 } 310 } 311 break; 312 default: 313 break; 314 } 315 } 316 } 317 318 if (revent & FDEVENT_HUP) { 319 /* fam closed the connection */ 320 fdevent_event_del(srv->ev, &(sc->fam_fcce_ndx), FAMCONNECTION_GETFD(&sc->fam)); 321 fdevent_unregister(srv->ev, FAMCONNECTION_GETFD(&sc->fam)); 322 323 FAMClose(&sc->fam); 324 } 325 326 return HANDLER_GO_ON; 327 } 328 329 static int buffer_copy_dirname(buffer *dst, buffer *file) { 330 size_t i; 331 332 if (buffer_string_is_empty(file)) return -1; 333 334 for (i = buffer_string_length(file); i > 0; i--) { 335 if (file->ptr[i] == '/') { 336 buffer_copy_string_len(dst, file->ptr, i); 337 return 0; 338 } 339 } 340 341 return -1; 342 } 343 #endif 344 345 #ifdef HAVE_LSTAT 346 static int stat_cache_lstat(server *srv, buffer *dname, struct stat *lst) { 347 if (lstat(dname->ptr, lst) == 0) { 348 return S_ISLNK(lst->st_mode) ? 0 : 1; 349 } 350 else { 351 log_error_write(srv, __FILE__, __LINE__, "sbs", 352 "lstat failed for:", 353 dname, strerror(errno)); 354 }; 355 return -1; 356 } 357 #endif 358 359 /*** 360 * 361 * 362 * 363 * returns: 364 * - HANDLER_FINISHED on cache-miss (don't forget to reopen the file) 365 * - HANDLER_ERROR on stat() failed -> see errno for problem 366 */ 367 368 handler_t stat_cache_get_entry(server *srv, connection *con, buffer *name, stat_cache_entry **ret_sce) { 369 #ifdef HAVE_FAM_H 370 fam_dir_entry *fam_dir = NULL; 371 int dir_ndx = -1; 372 #endif 373 stat_cache_entry *sce = NULL; 374 stat_cache *sc; 375 struct stat st; 376 size_t k; 377 int fd; 378 struct stat lst; 379 #ifdef DEBUG_STAT_CACHE 380 size_t i; 381 #endif 382 383 int file_ndx; 384 385 *ret_sce = NULL; 386 387 /* 388 * check if the directory for this file has changed 389 */ 390 391 sc = srv->stat_cache; 392 393 buffer_copy_buffer(sc->hash_key, name); 394 buffer_append_int(sc->hash_key, con->conf.follow_symlink); 395 396 file_ndx = hashme(sc->hash_key); 397 sc->files = splaytree_splay(sc->files, file_ndx); 398 399 #ifdef DEBUG_STAT_CACHE 400 for (i = 0; i < ctrl.used; i++) { 401 if (ctrl.ptr[i] == file_ndx) break; 402 } 403 #endif 404 405 if (sc->files && (sc->files->key == file_ndx)) { 406 #ifdef DEBUG_STAT_CACHE 407 /* it was in the cache */ 408 force_assert(i < ctrl.used); 409 #endif 410 411 /* we have seen this file already and 412 * don't stat() it again in the same second */ 413 414 sce = sc->files->data; 415 416 /* check if the name is the same, we might have a collision */ 417 418 if (buffer_is_equal(name, sce->name)) { 419 if (srv->srvconf.stat_cache_engine == STAT_CACHE_ENGINE_SIMPLE) { 420 if (sce->stat_ts == srv->cur_ts) { 421 *ret_sce = sce; 422 return HANDLER_GO_ON; 423 } 424 } 425 } else { 426 /* collision, forget about the entry */ 427 sce = NULL; 428 } 429 } else { 430 #ifdef DEBUG_STAT_CACHE 431 if (i != ctrl.used) { 432 log_error_write(srv, __FILE__, __LINE__, "xSB", 433 file_ndx, "was already inserted but not found in cache, ", name); 434 } 435 force_assert(i == ctrl.used); 436 #endif 437 } 438 439 #ifdef HAVE_FAM_H 440 /* dir-check */ 441 if (srv->srvconf.stat_cache_engine == STAT_CACHE_ENGINE_FAM) { 442 if (0 != buffer_copy_dirname(sc->dir_name, name)) { 443 log_error_write(srv, __FILE__, __LINE__, "sb", 444 "no '/' found in filename:", name); 445 return HANDLER_ERROR; 446 } 447 448 buffer_copy_buffer(sc->hash_key, sc->dir_name); 449 buffer_append_int(sc->hash_key, con->conf.follow_symlink); 450 451 dir_ndx = hashme(sc->hash_key); 452 453 sc->dirs = splaytree_splay(sc->dirs, dir_ndx); 454 455 if ((NULL != sc->dirs) && (sc->dirs->key == dir_ndx)) { 456 fam_dir = sc->dirs->data; 457 458 /* check whether we got a collision */ 459 if (buffer_is_equal(sc->dir_name, fam_dir->name)) { 460 /* test whether a found file cache entry is still ok */ 461 if ((NULL != sce) && (fam_dir->version == sce->dir_version)) { 462 /* the stat()-cache entry is still ok */ 463 464 *ret_sce = sce; 465 return HANDLER_GO_ON; 466 } 467 } else { 468 /* hash collision, forget about the entry */ 469 fam_dir = NULL; 470 } 471 } 472 } 473 #endif 474 475 /* 476 * *lol* 477 * - open() + fstat() on a named-pipe results in a (intended) hang. 478 * - stat() if regular file + open() to see if we can read from it is better 479 * 480 * */ 481 if (-1 == stat(name->ptr, &st)) { 482 return HANDLER_ERROR; 483 } 484 485 486 if (S_ISREG(st.st_mode)) { 487 /* fix broken stat/open for symlinks to reg files with appended slash on freebsd,osx */ 488 if (name->ptr[buffer_string_length(name) - 1] == '/') { 489 errno = ENOTDIR; 490 return HANDLER_ERROR; 491 } 492 493 /* try to open the file to check if we can read it */ 494 if (-1 == (fd = open(name->ptr, O_RDONLY))) { 495 return HANDLER_ERROR; 496 } 497 close(fd); 498 } 499 500 if (NULL == sce) { 501 502 sce = stat_cache_entry_init(); 503 buffer_copy_buffer(sce->name, name); 504 505 /* already splayed file_ndx */ 506 if ((NULL != sc->files) && (sc->files->key == file_ndx)) { 507 /* hash collision: replace old entry */ 508 stat_cache_entry_free(sc->files->data); 509 sc->files->data = sce; 510 } else { 511 int osize = splaytree_size(sc->files); 512 513 sc->files = splaytree_insert(sc->files, file_ndx, sce); 514 force_assert(osize + 1 == splaytree_size(sc->files)); 515 516 #ifdef DEBUG_STAT_CACHE 517 if (ctrl.size == 0) { 518 ctrl.size = 16; 519 ctrl.used = 0; 520 ctrl.ptr = malloc(ctrl.size * sizeof(*ctrl.ptr)); 521 } else if (ctrl.size == ctrl.used) { 522 ctrl.size += 16; 523 ctrl.ptr = realloc(ctrl.ptr, ctrl.size * sizeof(*ctrl.ptr)); 524 } 525 526 ctrl.ptr[ctrl.used++] = file_ndx; 527 #endif 528 } 529 force_assert(sc->files); 530 force_assert(sc->files->data == sce); 531 } 532 533 sce->st = st; 534 sce->stat_ts = srv->cur_ts; 535 536 /* catch the obvious symlinks 537 * 538 * this is not a secure check as we still have a race-condition between 539 * the stat() and the open. We can only solve this by 540 * 1. open() the file 541 * 2. fstat() the fd 542 * 543 * and keeping the file open for the rest of the time. But this can 544 * only be done at network level. 545 * 546 * per default it is not a symlink 547 * */ 548 #ifdef HAVE_LSTAT 549 sce->is_symlink = 0; 550 551 /* we want to only check for symlinks if we should block symlinks. 552 */ 553 if (!con->conf.follow_symlink) { 554 if (stat_cache_lstat(srv, name, &lst) == 0) { 555 #ifdef DEBUG_STAT_CACHE 556 log_error_write(srv, __FILE__, __LINE__, "sb", 557 "found symlink", name); 558 #endif 559 sce->is_symlink = 1; 560 } 561 562 /* 563 * we assume "/" can not be symlink, so 564 * skip the symlink stuff if our path is / 565 **/ 566 else if (buffer_string_length(name) > 1) { 567 buffer *dname; 568 char *s_cur; 569 570 dname = buffer_init(); 571 buffer_copy_buffer(dname, name); 572 573 while ((s_cur = strrchr(dname->ptr, '/'))) { 574 buffer_string_set_length(dname, s_cur - dname->ptr); 575 if (dname->ptr == s_cur) { 576 #ifdef DEBUG_STAT_CACHE 577 log_error_write(srv, __FILE__, __LINE__, "s", "reached /"); 578 #endif 579 break; 580 } 581 #ifdef DEBUG_STAT_CACHE 582 log_error_write(srv, __FILE__, __LINE__, "sbs", 583 "checking if", dname, "is a symlink"); 584 #endif 585 if (stat_cache_lstat(srv, dname, &lst) == 0) { 586 sce->is_symlink = 1; 587 #ifdef DEBUG_STAT_CACHE 588 log_error_write(srv, __FILE__, __LINE__, "sb", 589 "found symlink", dname); 590 #endif 591 break; 592 }; 593 }; 594 buffer_free(dname); 595 }; 596 }; 597 #endif 598 599 if (S_ISREG(st.st_mode)) { 600 /* determine mimetype */ 601 buffer_reset(sce->content_type); 602 #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR) 603 if (con->conf.use_xattr) { 604 stat_cache_attr_get(sce->content_type, name->ptr); 605 } 606 #endif 607 /* xattr did not set a content-type. ask the config */ 608 if (buffer_string_is_empty(sce->content_type)) { 609 size_t namelen = buffer_string_length(name); 610 611 for (k = 0; k < con->conf.mimetypes->used; k++) { 612 data_string *ds = (data_string *)con->conf.mimetypes->data[k]; 613 buffer *type = ds->key; 614 size_t typelen = buffer_string_length(type); 615 616 if (buffer_is_empty(type)) continue; 617 618 /* check if the right side is the same */ 619 if (typelen > namelen) continue; 620 621 if (0 == strncasecmp(name->ptr + namelen - typelen, type->ptr, typelen)) { 622 buffer_copy_buffer(sce->content_type, ds->value); 623 break; 624 } 625 } 626 } 627 etag_create(sce->etag, &(sce->st), con->etag_flags); 628 } else if (S_ISDIR(st.st_mode)) { 629 etag_create(sce->etag, &(sce->st), con->etag_flags); 630 } 631 632 #ifdef HAVE_FAM_H 633 if (srv->srvconf.stat_cache_engine == STAT_CACHE_ENGINE_FAM) { 634 /* is this directory already registered ? */ 635 if (NULL == fam_dir) { 636 fam_dir = fam_dir_entry_init(); 637 638 buffer_copy_buffer(fam_dir->name, sc->dir_name); 639 640 fam_dir->version = 1; 641 642 fam_dir->req = calloc(1, sizeof(FAMRequest)); 643 644 if (0 != FAMMonitorDirectory(&sc->fam, fam_dir->name->ptr, 645 fam_dir->req, fam_dir)) { 646 647 log_error_write(srv, __FILE__, __LINE__, "sbsbs", 648 "monitoring dir failed:", 649 fam_dir->name, 650 "file:", name, 651 FamErrlist[FAMErrno]); 652 653 fam_dir_entry_free(&sc->fam, fam_dir); 654 fam_dir = NULL; 655 } else { 656 int osize = splaytree_size(sc->dirs); 657 658 /* already splayed dir_ndx */ 659 if ((NULL != sc->dirs) && (sc->dirs->key == dir_ndx)) { 660 /* hash collision: replace old entry */ 661 fam_dir_entry_free(&sc->fam, sc->dirs->data); 662 sc->dirs->data = fam_dir; 663 } else { 664 sc->dirs = splaytree_insert(sc->dirs, dir_ndx, fam_dir); 665 force_assert(osize == (splaytree_size(sc->dirs) - 1)); 666 } 667 668 force_assert(sc->dirs); 669 force_assert(sc->dirs->data == fam_dir); 670 } 671 } 672 673 /* bind the fam_fc to the stat() cache entry */ 674 675 if (fam_dir) { 676 sce->dir_version = fam_dir->version; 677 } 678 } 679 #endif 680 681 *ret_sce = sce; 682 683 return HANDLER_GO_ON; 684 } 685 686 /** 687 * remove stat() from cache which havn't been stat()ed for 688 * more than 10 seconds 689 * 690 * 691 * walk though the stat-cache, collect the ids which are too old 692 * and remove them in a second loop 693 */ 694 695 static int stat_cache_tag_old_entries(server *srv, splay_tree *t, int *keys, size_t *ndx) { 696 stat_cache_entry *sce; 697 698 if (!t) return 0; 699 700 stat_cache_tag_old_entries(srv, t->left, keys, ndx); 701 stat_cache_tag_old_entries(srv, t->right, keys, ndx); 702 703 sce = t->data; 704 705 if (srv->cur_ts - sce->stat_ts > 2) { 706 keys[(*ndx)++] = t->key; 707 } 708 709 return 0; 710 } 711 712 int stat_cache_trigger_cleanup(server *srv) { 713 stat_cache *sc; 714 size_t max_ndx = 0, i; 715 int *keys; 716 717 sc = srv->stat_cache; 718 719 if (!sc->files) return 0; 720 721 keys = calloc(1, sizeof(int) * sc->files->size); 722 723 stat_cache_tag_old_entries(srv, sc->files, keys, &max_ndx); 724 725 for (i = 0; i < max_ndx; i++) { 726 int ndx = keys[i]; 727 splay_tree *node; 728 729 sc->files = splaytree_splay(sc->files, ndx); 730 731 node = sc->files; 732 733 if (node && (node->key == ndx)) { 734 #ifdef DEBUG_STAT_CACHE 735 size_t j; 736 int osize = splaytree_size(sc->files); 737 stat_cache_entry *sce = node->data; 738 #endif 739 stat_cache_entry_free(node->data); 740 sc->files = splaytree_delete(sc->files, ndx); 741 742 #ifdef DEBUG_STAT_CACHE 743 for (j = 0; j < ctrl.used; j++) { 744 if (ctrl.ptr[j] == ndx) { 745 ctrl.ptr[j] = ctrl.ptr[--ctrl.used]; 746 break; 747 } 748 } 749 750 force_assert(osize - 1 == splaytree_size(sc->files)); 751 #endif 752 } 753 } 754 755 free(keys); 756 757 return 0; 758 } 759