xref: /lighttpd1.4/src/stat_cache.c (revision e6925949)
1 #include "log.h"
2 #include "stat_cache.h"
3 #include "fdevent.h"
4 #include "etag.h"
5 
6 #include <sys/types.h>
7 #include <sys/stat.h>
8 
9 #include <stdlib.h>
10 #include <string.h>
11 #include <errno.h>
12 #include <unistd.h>
13 #include <stdio.h>
14 #include <fcntl.h>
15 #include <assert.h>
16 
17 #ifdef HAVE_ATTR_ATTRIBUTES_H
18 # include <attr/attributes.h>
19 #endif
20 
21 #ifdef HAVE_SYS_EXTATTR_H
22 # include <sys/extattr.h>
23 #endif
24 
25 #ifdef HAVE_FAM_H
26 # include <fam.h>
27 #endif
28 
29 #include "sys-mmap.h"
30 
31 /* NetBSD 1.3.x needs it */
32 #ifndef MAP_FAILED
33 # define MAP_FAILED -1
34 #endif
35 
36 #ifndef O_LARGEFILE
37 # define O_LARGEFILE 0
38 #endif
39 
40 #ifndef HAVE_LSTAT
41 # define lstat stat
42 #endif
43 
44 #if 0
45 /* enables debug code for testing if all nodes in the stat-cache as accessable */
46 #define DEBUG_STAT_CACHE
47 #endif
48 
49 /*
50  * stat-cache
51  *
52  * we cache the stat() calls in our own storage
53  * the directories are cached in FAM
54  *
55  * if we get a change-event from FAM, we increment the version in the FAM->dir mapping
56  *
57  * if the stat()-cache is queried we check if the version id for the directory is the
58  * same and return immediatly.
59  *
60  *
61  * What we need:
62  *
63  * - for each stat-cache entry we need a fast indirect lookup on the directory name
64  * - for each FAMRequest we have to find the version in the directory cache (index as userdata)
65  *
66  * stat <<-> directory <-> FAMRequest
67  *
68  * if file is deleted, directory is dirty, file is rechecked ...
69  * if directory is deleted, directory mapping is removed
70  *
71  * */
72 
73 #ifdef HAVE_FAM_H
74 typedef struct {
75 	FAMRequest *req;
76 
77 	buffer *name;
78 
79 	int version;
80 } fam_dir_entry;
81 #endif
82 
83 /* the directory name is too long to always compare on it
84  * - we need a hash
85  * - the hash-key is used as sorting criteria for a tree
86  * - a splay-tree is used as we can use the caching effect of it
87  */
88 
89 /* we want to cleanup the stat-cache every few seconds, let's say 10
90  *
91  * - remove entries which are outdated since 30s
92  * - remove entries which are fresh but havn't been used since 60s
93  * - if we don't have a stat-cache entry for a directory, release it from the monitor
94  */
95 
96 #ifdef DEBUG_STAT_CACHE
97 typedef struct {
98 	int *ptr;
99 
100 	size_t used;
101 	size_t size;
102 } fake_keys;
103 
104 static fake_keys ctrl;
105 #endif
106 
107 stat_cache *stat_cache_init(void) {
108 	stat_cache *sc = NULL;
109 
110 	sc = calloc(1, sizeof(*sc));
111 
112 	sc->dir_name = buffer_init();
113 	sc->hash_key = buffer_init();
114 
115 #ifdef HAVE_FAM_H
116 	sc->fam_fcce_ndx = -1;
117 #endif
118 
119 #ifdef DEBUG_STAT_CACHE
120 	ctrl.size = 0;
121 #endif
122 
123 	return sc;
124 }
125 
126 static stat_cache_entry * stat_cache_entry_init(void) {
127 	stat_cache_entry *sce = NULL;
128 
129 	sce = calloc(1, sizeof(*sce));
130 
131 	sce->name = buffer_init();
132 	sce->etag = buffer_init();
133 	sce->content_type = buffer_init();
134 
135 	return sce;
136 }
137 
138 static void stat_cache_entry_free(void *data) {
139 	stat_cache_entry *sce = data;
140 	if (!sce) return;
141 
142 	buffer_free(sce->etag);
143 	buffer_free(sce->name);
144 	buffer_free(sce->content_type);
145 
146 	free(sce);
147 }
148 
149 #ifdef HAVE_FAM_H
150 static fam_dir_entry * fam_dir_entry_init(void) {
151 	fam_dir_entry *fam_dir = NULL;
152 
153 	fam_dir = calloc(1, sizeof(*fam_dir));
154 
155 	fam_dir->name = buffer_init();
156 
157 	return fam_dir;
158 }
159 
160 static void fam_dir_entry_free(FAMConnection *fc, void *data) {
161 	fam_dir_entry *fam_dir = data;
162 
163 	if (!fam_dir) return;
164 
165 	FAMCancelMonitor(fc, fam_dir->req);
166 
167 	buffer_free(fam_dir->name);
168 	free(fam_dir->req);
169 
170 	free(fam_dir);
171 }
172 #endif
173 
174 void stat_cache_free(stat_cache *sc) {
175 	while (sc->files) {
176 		int osize;
177 		splay_tree *node = sc->files;
178 
179 		osize = sc->files->size;
180 
181 		stat_cache_entry_free(node->data);
182 		sc->files = splaytree_delete(sc->files, node->key);
183 
184 		force_assert(osize - 1 == splaytree_size(sc->files));
185 	}
186 
187 	buffer_free(sc->dir_name);
188 	buffer_free(sc->hash_key);
189 
190 #ifdef HAVE_FAM_H
191 	while (sc->dirs) {
192 		int osize;
193 		splay_tree *node = sc->dirs;
194 
195 		osize = sc->dirs->size;
196 
197 		fam_dir_entry_free(&sc->fam, node->data);
198 		sc->dirs = splaytree_delete(sc->dirs, node->key);
199 
200 		if (osize == 1) {
201 			force_assert(NULL == sc->dirs);
202 		} else {
203 			force_assert(osize == (sc->dirs->size + 1));
204 		}
205 	}
206 
207 	if (-1 != sc->fam_fcce_ndx) {
208 		/* fd events already gone */
209 		sc->fam_fcce_ndx = -1;
210 
211 		FAMClose(&sc->fam);
212 	}
213 #endif
214 	free(sc);
215 }
216 
217 #if defined(HAVE_XATTR)
218 static int stat_cache_attr_get(buffer *buf, char *name) {
219 	int attrlen;
220 	int ret;
221 
222 	buffer_string_prepare_copy(buf, 1023);
223 	attrlen = buf->size - 1;
224 	if(0 == (ret = attr_get(name, "Content-Type", buf->ptr, &attrlen, 0))) {
225 		buffer_commit(buf, attrlen);
226 	}
227 	return ret;
228 }
229 #elif defined(HAVE_EXTATTR)
230 static int stat_cache_attr_get(buffer *buf, char *name) {
231 	ssize_t attrlen;
232 
233 	buffer_prepare_copy(buf, 1023);
234 
235 	if (-1 != (attrlen = extattr_get_file(name, EXTATTR_NAMESPACE_USER, "Content-Type", buf->ptr, buf->size - 1))) {
236 		buf->used = attrlen + 1;
237 		buf->ptr[attrlen] = '\0';
238 		return 0;
239 	}
240 	return -1;
241 }
242 #endif
243 
244 /* the famous DJB hash function for strings */
245 static uint32_t hashme(buffer *str) {
246 	uint32_t hash = 5381;
247 	const char *s;
248 	for (s = str->ptr; *s; s++) {
249 		hash = ((hash << 5) + hash) + *s;
250 	}
251 
252 	hash &= ~(1 << 31); /* strip the highest bit */
253 
254 	return hash;
255 }
256 
257 #ifdef HAVE_FAM_H
258 handler_t stat_cache_handle_fdevent(server *srv, void *_fce, int revent) {
259 	size_t i;
260 	stat_cache *sc = srv->stat_cache;
261 	size_t events;
262 
263 	UNUSED(_fce);
264 	/* */
265 
266 	if (revent & FDEVENT_IN) {
267 		events = FAMPending(&sc->fam);
268 
269 		for (i = 0; i < events; i++) {
270 			FAMEvent fe;
271 			fam_dir_entry *fam_dir;
272 			splay_tree *node;
273 			int ndx, j;
274 
275 			FAMNextEvent(&sc->fam, &fe);
276 
277 			/* handle event */
278 
279 			switch(fe.code) {
280 			case FAMChanged:
281 			case FAMDeleted:
282 			case FAMMoved:
283 				/* if the filename is a directory remove the entry */
284 
285 				fam_dir = fe.userdata;
286 				fam_dir->version++;
287 
288 				/* file/dir is still here */
289 				if (fe.code == FAMChanged) break;
290 
291 				/* we have 2 versions, follow and no-follow-symlink */
292 
293 				for (j = 0; j < 2; j++) {
294 					buffer_copy_string(sc->hash_key, fe.filename);
295 					buffer_append_int(sc->hash_key, j);
296 
297 					ndx = hashme(sc->hash_key);
298 
299 					sc->dirs = splaytree_splay(sc->dirs, ndx);
300 					node = sc->dirs;
301 
302 					if (node && (node->key == ndx)) {
303 						int osize = splaytree_size(sc->dirs);
304 
305 						fam_dir_entry_free(&sc->fam, node->data);
306 						sc->dirs = splaytree_delete(sc->dirs, ndx);
307 
308 						force_assert(osize - 1 == splaytree_size(sc->dirs));
309 					}
310 				}
311 				break;
312 			default:
313 				break;
314 			}
315 		}
316 	}
317 
318 	if (revent & FDEVENT_HUP) {
319 		/* fam closed the connection */
320 		fdevent_event_del(srv->ev, &(sc->fam_fcce_ndx), FAMCONNECTION_GETFD(&sc->fam));
321 		fdevent_unregister(srv->ev, FAMCONNECTION_GETFD(&sc->fam));
322 
323 		FAMClose(&sc->fam);
324 	}
325 
326 	return HANDLER_GO_ON;
327 }
328 
329 static int buffer_copy_dirname(buffer *dst, buffer *file) {
330 	size_t i;
331 
332 	if (buffer_string_is_empty(file)) return -1;
333 
334 	for (i = buffer_string_length(file); i > 0; i--) {
335 		if (file->ptr[i] == '/') {
336 			buffer_copy_string_len(dst, file->ptr, i);
337 			return 0;
338 		}
339 	}
340 
341 	return -1;
342 }
343 #endif
344 
345 #ifdef HAVE_LSTAT
346 static int stat_cache_lstat(server *srv, buffer *dname, struct stat *lst) {
347 	if (lstat(dname->ptr, lst) == 0) {
348 		return S_ISLNK(lst->st_mode) ? 0 : 1;
349 	}
350 	else {
351 		log_error_write(srv, __FILE__, __LINE__, "sbs",
352 				"lstat failed for:",
353 				dname, strerror(errno));
354 	};
355 	return -1;
356 }
357 #endif
358 
359 /***
360  *
361  *
362  *
363  * returns:
364  *  - HANDLER_FINISHED on cache-miss (don't forget to reopen the file)
365  *  - HANDLER_ERROR on stat() failed -> see errno for problem
366  */
367 
368 handler_t stat_cache_get_entry(server *srv, connection *con, buffer *name, stat_cache_entry **ret_sce) {
369 #ifdef HAVE_FAM_H
370 	fam_dir_entry *fam_dir = NULL;
371 	int dir_ndx = -1;
372 	splay_tree *dir_node = NULL;
373 #endif
374 	stat_cache_entry *sce = NULL;
375 	stat_cache *sc;
376 	struct stat st;
377 	size_t k;
378 	int fd;
379 	struct stat lst;
380 #ifdef DEBUG_STAT_CACHE
381 	size_t i;
382 #endif
383 
384 	int file_ndx;
385 	splay_tree *file_node = NULL;
386 
387 	*ret_sce = NULL;
388 
389 	/*
390 	 * check if the directory for this file has changed
391 	 */
392 
393 	sc = srv->stat_cache;
394 
395 	buffer_copy_buffer(sc->hash_key, name);
396 	buffer_append_int(sc->hash_key, con->conf.follow_symlink);
397 
398 	file_ndx = hashme(sc->hash_key);
399 	sc->files = splaytree_splay(sc->files, file_ndx);
400 
401 #ifdef DEBUG_STAT_CACHE
402 	for (i = 0; i < ctrl.used; i++) {
403 		if (ctrl.ptr[i] == file_ndx) break;
404 	}
405 #endif
406 
407 	if (sc->files && (sc->files->key == file_ndx)) {
408 #ifdef DEBUG_STAT_CACHE
409 		/* it was in the cache */
410 		force_assert(i < ctrl.used);
411 #endif
412 
413 		/* we have seen this file already and
414 		 * don't stat() it again in the same second */
415 
416 		file_node = sc->files;
417 
418 		sce = file_node->data;
419 
420 		/* check if the name is the same, we might have a collision */
421 
422 		if (buffer_is_equal(name, sce->name)) {
423 			if (srv->srvconf.stat_cache_engine == STAT_CACHE_ENGINE_SIMPLE) {
424 				if (sce->stat_ts == srv->cur_ts) {
425 					*ret_sce = sce;
426 					return HANDLER_GO_ON;
427 				}
428 			}
429 		} else {
430 			/* oops, a collision,
431 			 *
432 			 * file_node is used by the FAM check below to see if we know this file
433 			 * and if we can save a stat().
434 			 *
435 			 * BUT, the sce is not reset here as the entry into the cache is ok, we
436 			 * it is just not pointing to our requested file.
437 			 *
438 			 *  */
439 
440 			file_node = NULL;
441 		}
442 	} else {
443 #ifdef DEBUG_STAT_CACHE
444 		if (i != ctrl.used) {
445 			log_error_write(srv, __FILE__, __LINE__, "xSB",
446 				file_ndx, "was already inserted but not found in cache, ", name);
447 		}
448 		force_assert(i == ctrl.used);
449 #endif
450 	}
451 
452 #ifdef HAVE_FAM_H
453 	/* dir-check */
454 	if (srv->srvconf.stat_cache_engine == STAT_CACHE_ENGINE_FAM) {
455 		if (0 != buffer_copy_dirname(sc->dir_name, name)) {
456 			log_error_write(srv, __FILE__, __LINE__, "sb",
457 				"no '/' found in filename:", name);
458 			return HANDLER_ERROR;
459 		}
460 
461 		buffer_copy_buffer(sc->hash_key, sc->dir_name);
462 		buffer_append_int(sc->hash_key, con->conf.follow_symlink);
463 
464 		dir_ndx = hashme(sc->hash_key);
465 
466 		sc->dirs = splaytree_splay(sc->dirs, dir_ndx);
467 
468 		if (sc->dirs && (sc->dirs->key == dir_ndx)) {
469 			dir_node = sc->dirs;
470 		}
471 
472 		if (dir_node && file_node) {
473 			/* we found a file */
474 
475 			sce = file_node->data;
476 			fam_dir = dir_node->data;
477 
478 			if (fam_dir->version == sce->dir_version) {
479 				/* the stat()-cache entry is still ok */
480 
481 				*ret_sce = sce;
482 				return HANDLER_GO_ON;
483 			}
484 		}
485 	}
486 #endif
487 
488 	/*
489 	 * *lol*
490 	 * - open() + fstat() on a named-pipe results in a (intended) hang.
491 	 * - stat() if regular file + open() to see if we can read from it is better
492 	 *
493 	 * */
494 	if (-1 == stat(name->ptr, &st)) {
495 		return HANDLER_ERROR;
496 	}
497 
498 
499 	if (S_ISREG(st.st_mode)) {
500 		/* fix broken stat/open for symlinks to reg files with appended slash on freebsd,osx */
501 		if (name->ptr[buffer_string_length(name) - 1] == '/') {
502 			errno = ENOTDIR;
503 			return HANDLER_ERROR;
504 		}
505 
506 		/* try to open the file to check if we can read it */
507 		if (-1 == (fd = open(name->ptr, O_RDONLY))) {
508 			return HANDLER_ERROR;
509 		}
510 		close(fd);
511 	}
512 
513 	if (NULL == sce) {
514 #ifdef DEBUG_STAT_CACHE
515 		int osize = splaytree_size(sc->files);
516 #endif
517 
518 		sce = stat_cache_entry_init();
519 		buffer_copy_buffer(sce->name, name);
520 
521 		sc->files = splaytree_insert(sc->files, file_ndx, sce);
522 #ifdef DEBUG_STAT_CACHE
523 		if (ctrl.size == 0) {
524 			ctrl.size = 16;
525 			ctrl.used = 0;
526 			ctrl.ptr = malloc(ctrl.size * sizeof(*ctrl.ptr));
527 		} else if (ctrl.size == ctrl.used) {
528 			ctrl.size += 16;
529 			ctrl.ptr = realloc(ctrl.ptr, ctrl.size * sizeof(*ctrl.ptr));
530 		}
531 
532 		ctrl.ptr[ctrl.used++] = file_ndx;
533 
534 		force_assert(sc->files);
535 		force_assert(sc->files->data == sce);
536 		force_assert(osize + 1 == splaytree_size(sc->files));
537 #endif
538 	}
539 
540 	sce->st = st;
541 	sce->stat_ts = srv->cur_ts;
542 
543 	/* catch the obvious symlinks
544 	 *
545 	 * this is not a secure check as we still have a race-condition between
546 	 * the stat() and the open. We can only solve this by
547 	 * 1. open() the file
548 	 * 2. fstat() the fd
549 	 *
550 	 * and keeping the file open for the rest of the time. But this can
551 	 * only be done at network level.
552 	 *
553 	 * per default it is not a symlink
554 	 * */
555 #ifdef HAVE_LSTAT
556 	sce->is_symlink = 0;
557 
558 	/* we want to only check for symlinks if we should block symlinks.
559 	 */
560 	if (!con->conf.follow_symlink) {
561 		if (stat_cache_lstat(srv, name, &lst)  == 0) {
562 #ifdef DEBUG_STAT_CACHE
563 				log_error_write(srv, __FILE__, __LINE__, "sb",
564 						"found symlink", name);
565 #endif
566 				sce->is_symlink = 1;
567 		}
568 
569 		/*
570 		 * we assume "/" can not be symlink, so
571 		 * skip the symlink stuff if our path is /
572 		 **/
573 		else if (buffer_string_length(name) > 1) {
574 			buffer *dname;
575 			char *s_cur;
576 
577 			dname = buffer_init();
578 			buffer_copy_buffer(dname, name);
579 
580 			while ((s_cur = strrchr(dname->ptr, '/'))) {
581 				buffer_string_set_length(dname, s_cur - dname->ptr);
582 				if (dname->ptr == s_cur) {
583 #ifdef DEBUG_STAT_CACHE
584 					log_error_write(srv, __FILE__, __LINE__, "s", "reached /");
585 #endif
586 					break;
587 				}
588 #ifdef DEBUG_STAT_CACHE
589 				log_error_write(srv, __FILE__, __LINE__, "sbs",
590 						"checking if", dname, "is a symlink");
591 #endif
592 				if (stat_cache_lstat(srv, dname, &lst)  == 0) {
593 					sce->is_symlink = 1;
594 #ifdef DEBUG_STAT_CACHE
595 					log_error_write(srv, __FILE__, __LINE__, "sb",
596 							"found symlink", dname);
597 #endif
598 					break;
599 				};
600 			};
601 			buffer_free(dname);
602 		};
603 	};
604 #endif
605 
606 	if (S_ISREG(st.st_mode)) {
607 		/* determine mimetype */
608 		buffer_reset(sce->content_type);
609 #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
610 		if (con->conf.use_xattr) {
611 			stat_cache_attr_get(sce->content_type, name->ptr);
612 		}
613 #endif
614 		/* xattr did not set a content-type. ask the config */
615 		if (buffer_string_is_empty(sce->content_type)) {
616 			size_t namelen = buffer_string_length(name);
617 
618 			for (k = 0; k < con->conf.mimetypes->used; k++) {
619 				data_string *ds = (data_string *)con->conf.mimetypes->data[k];
620 				buffer *type = ds->key;
621 				size_t typelen = buffer_string_length(type);
622 
623 				if (buffer_is_empty(type)) continue;
624 
625 				/* check if the right side is the same */
626 				if (typelen > namelen) continue;
627 
628 				if (0 == strncasecmp(name->ptr + namelen - typelen, type->ptr, typelen)) {
629 					buffer_copy_buffer(sce->content_type, ds->value);
630 					break;
631 				}
632 			}
633 		}
634 		etag_create(sce->etag, &(sce->st), con->etag_flags);
635 	} else if (S_ISDIR(st.st_mode)) {
636 		etag_create(sce->etag, &(sce->st), con->etag_flags);
637 	}
638 
639 #ifdef HAVE_FAM_H
640 	if (srv->srvconf.stat_cache_engine == STAT_CACHE_ENGINE_FAM) {
641 		/* is this directory already registered ? */
642 		if (!dir_node) {
643 			fam_dir = fam_dir_entry_init();
644 
645 			buffer_copy_buffer(fam_dir->name, sc->dir_name);
646 
647 			fam_dir->version = 1;
648 
649 			fam_dir->req = calloc(1, sizeof(FAMRequest));
650 
651 			if (0 != FAMMonitorDirectory(&sc->fam, fam_dir->name->ptr,
652 						     fam_dir->req, fam_dir)) {
653 
654 				log_error_write(srv, __FILE__, __LINE__, "sbsbs",
655 						"monitoring dir failed:",
656 						fam_dir->name,
657 						"file:", name,
658 						FamErrlist[FAMErrno]);
659 
660 				fam_dir_entry_free(&sc->fam, fam_dir);
661 				fam_dir = NULL;
662 			} else {
663 				int osize = 0;
664 
665 				if (sc->dirs) {
666 					osize = sc->dirs->size;
667 				}
668 
669 				sc->dirs = splaytree_insert(sc->dirs, dir_ndx, fam_dir);
670 				force_assert(sc->dirs);
671 				force_assert(sc->dirs->data == fam_dir);
672 				force_assert(osize == (sc->dirs->size - 1));
673 			}
674 		} else {
675 			fam_dir = dir_node->data;
676 		}
677 
678 		/* bind the fam_fc to the stat() cache entry */
679 
680 		if (fam_dir) {
681 			sce->dir_version = fam_dir->version;
682 		}
683 	}
684 #endif
685 
686 	*ret_sce = sce;
687 
688 	return HANDLER_GO_ON;
689 }
690 
691 /**
692  * remove stat() from cache which havn't been stat()ed for
693  * more than 10 seconds
694  *
695  *
696  * walk though the stat-cache, collect the ids which are too old
697  * and remove them in a second loop
698  */
699 
700 static int stat_cache_tag_old_entries(server *srv, splay_tree *t, int *keys, size_t *ndx) {
701 	stat_cache_entry *sce;
702 
703 	if (!t) return 0;
704 
705 	stat_cache_tag_old_entries(srv, t->left, keys, ndx);
706 	stat_cache_tag_old_entries(srv, t->right, keys, ndx);
707 
708 	sce = t->data;
709 
710 	if (srv->cur_ts - sce->stat_ts > 2) {
711 		keys[(*ndx)++] = t->key;
712 	}
713 
714 	return 0;
715 }
716 
717 int stat_cache_trigger_cleanup(server *srv) {
718 	stat_cache *sc;
719 	size_t max_ndx = 0, i;
720 	int *keys;
721 
722 	sc = srv->stat_cache;
723 
724 	if (!sc->files) return 0;
725 
726 	keys = calloc(1, sizeof(int) * sc->files->size);
727 
728 	stat_cache_tag_old_entries(srv, sc->files, keys, &max_ndx);
729 
730 	for (i = 0; i < max_ndx; i++) {
731 		int ndx = keys[i];
732 		splay_tree *node;
733 
734 		sc->files = splaytree_splay(sc->files, ndx);
735 
736 		node = sc->files;
737 
738 		if (node && (node->key == ndx)) {
739 #ifdef DEBUG_STAT_CACHE
740 			size_t j;
741 			int osize = splaytree_size(sc->files);
742 			stat_cache_entry *sce = node->data;
743 #endif
744 			stat_cache_entry_free(node->data);
745 			sc->files = splaytree_delete(sc->files, ndx);
746 
747 #ifdef DEBUG_STAT_CACHE
748 			for (j = 0; j < ctrl.used; j++) {
749 				if (ctrl.ptr[j] == ndx) {
750 					ctrl.ptr[j] = ctrl.ptr[--ctrl.used];
751 					break;
752 				}
753 			}
754 
755 			force_assert(osize - 1 == splaytree_size(sc->files));
756 #endif
757 		}
758 	}
759 
760 	free(keys);
761 
762 	return 0;
763 }
764