1 #include "log.h"
2 #include "stat_cache.h"
3 #include "fdevent.h"
4 #include "etag.h"
5 
6 #include <sys/types.h>
7 #include <sys/stat.h>
8 
9 #include <stdlib.h>
10 #include <string.h>
11 #include <errno.h>
12 #include <unistd.h>
13 #include <stdio.h>
14 #include <fcntl.h>
15 #include <assert.h>
16 
17 #ifdef HAVE_ATTR_ATTRIBUTES_H
18 # include <attr/attributes.h>
19 #endif
20 
21 #ifdef HAVE_FAM_H
22 # include <fam.h>
23 #endif
24 
25 #include "sys-mmap.h"
26 
27 /* NetBSD 1.3.x needs it */
28 #ifndef MAP_FAILED
29 # define MAP_FAILED -1
30 #endif
31 
32 #ifndef O_LARGEFILE
33 # define O_LARGEFILE 0
34 #endif
35 
36 #ifndef HAVE_LSTAT
37 # define lstat stat
38 #endif
39 
40 #if 0
41 /* enables debug code for testing if all nodes in the stat-cache as accessable */
42 #define DEBUG_STAT_CACHE
43 #endif
44 
45 /*
46  * stat-cache
47  *
48  * we cache the stat() calls in our own storage
49  * the directories are cached in FAM
50  *
51  * if we get a change-event from FAM, we increment the version in the FAM->dir mapping
52  *
53  * if the stat()-cache is queried we check if the version id for the directory is the
54  * same and return immediatly.
55  *
56  *
57  * What we need:
58  *
59  * - for each stat-cache entry we need a fast indirect lookup on the directory name
60  * - for each FAMRequest we have to find the version in the directory cache (index as userdata)
61  *
62  * stat <<-> directory <-> FAMRequest
63  *
64  * if file is deleted, directory is dirty, file is rechecked ...
65  * if directory is deleted, directory mapping is removed
66  *
67  * */
68 
69 #ifdef HAVE_FAM_H
70 typedef struct {
71 	FAMRequest *req;
72 	FAMConnection *fc;
73 
74 	buffer *name;
75 
76 	int version;
77 } fam_dir_entry;
78 #endif
79 
80 /* the directory name is too long to always compare on it
81  * - we need a hash
82  * - the hash-key is used as sorting criteria for a tree
83  * - a splay-tree is used as we can use the caching effect of it
84  */
85 
86 /* we want to cleanup the stat-cache every few seconds, let's say 10
87  *
88  * - remove entries which are outdated since 30s
89  * - remove entries which are fresh but havn't been used since 60s
90  * - if we don't have a stat-cache entry for a directory, release it from the monitor
91  */
92 
93 #ifdef DEBUG_STAT_CACHE
94 typedef struct {
95 	int *ptr;
96 
97 	size_t used;
98 	size_t size;
99 } fake_keys;
100 
101 static fake_keys ctrl;
102 #endif
103 
stat_cache_init(void)104 stat_cache *stat_cache_init(void) {
105 	stat_cache *fc = NULL;
106 
107 	fc = calloc(1, sizeof(*fc));
108 
109 	fc->dir_name = buffer_init();
110 	fc->hash_key = buffer_init();
111 #ifdef HAVE_FAM_H
112 	fc->fam = calloc(1, sizeof(*fc->fam));
113 #endif
114 
115 #ifdef DEBUG_STAT_CACHE
116 	ctrl.size = 0;
117 #endif
118 
119 	return fc;
120 }
121 
stat_cache_entry_init(void)122 static stat_cache_entry * stat_cache_entry_init(void) {
123 	stat_cache_entry *sce = NULL;
124 
125 	sce = calloc(1, sizeof(*sce));
126 
127 	sce->name = buffer_init();
128 	sce->etag = buffer_init();
129 	sce->content_type = buffer_init();
130 
131 	return sce;
132 }
133 
stat_cache_entry_free(void * data)134 static void stat_cache_entry_free(void *data) {
135 	stat_cache_entry *sce = data;
136 	if (!sce) return;
137 
138 	buffer_free(sce->etag);
139 	buffer_free(sce->name);
140 	buffer_free(sce->content_type);
141 
142 	free(sce);
143 }
144 
145 #ifdef HAVE_FAM_H
fam_dir_entry_init(void)146 static fam_dir_entry * fam_dir_entry_init(void) {
147 	fam_dir_entry *fam_dir = NULL;
148 
149 	fam_dir = calloc(1, sizeof(*fam_dir));
150 
151 	fam_dir->name = buffer_init();
152 
153 	return fam_dir;
154 }
155 
fam_dir_entry_free(void * data)156 static void fam_dir_entry_free(void *data) {
157 	fam_dir_entry *fam_dir = data;
158 
159 	if (!fam_dir) return;
160 
161 	FAMCancelMonitor(fam_dir->fc, fam_dir->req);
162 
163 	buffer_free(fam_dir->name);
164 	free(fam_dir->req);
165 
166 	free(fam_dir);
167 }
168 #endif
169 
stat_cache_free(stat_cache * sc)170 void stat_cache_free(stat_cache *sc) {
171 	while (sc->files) {
172 		int osize;
173 		splay_tree *node = sc->files;
174 
175 		osize = sc->files->size;
176 
177 		stat_cache_entry_free(node->data);
178 		sc->files = splaytree_delete(sc->files, node->key);
179 
180 		assert(osize - 1 == splaytree_size(sc->files));
181 	}
182 
183 	buffer_free(sc->dir_name);
184 	buffer_free(sc->hash_key);
185 
186 #ifdef HAVE_FAM_H
187 	while (sc->dirs) {
188 		int osize;
189 		splay_tree *node = sc->dirs;
190 
191 		osize = sc->dirs->size;
192 
193 		fam_dir_entry_free(node->data);
194 		sc->dirs = splaytree_delete(sc->dirs, node->key);
195 
196 		if (osize == 1) {
197 			assert(NULL == sc->dirs);
198 		} else {
199 			assert(osize == (sc->dirs->size + 1));
200 		}
201 	}
202 
203 	if (sc->fam) {
204 		FAMClose(sc->fam);
205 		free(sc->fam);
206 	}
207 #endif
208 	free(sc);
209 }
210 
211 #ifdef HAVE_XATTR
stat_cache_attr_get(buffer * buf,char * name)212 static int stat_cache_attr_get(buffer *buf, char *name) {
213 	int attrlen;
214 	int ret;
215 
216 	attrlen = 1024;
217 	buffer_prepare_copy(buf, attrlen);
218 	attrlen--;
219 	if(0 == (ret = attr_get(name, "Content-Type", buf->ptr, &attrlen, 0))) {
220 		buf->used = attrlen + 1;
221 		buf->ptr[attrlen] = '\0';
222 	}
223 	return ret;
224 }
225 #endif
226 
227 /* the famous DJB hash function for strings */
hashme(buffer * str)228 static uint32_t hashme(buffer *str) {
229 	uint32_t hash = 5381;
230 	const char *s;
231 	for (s = str->ptr; *s; s++) {
232 		hash = ((hash << 5) + hash) + *s;
233 	}
234 
235 	hash &= ~(1 << 31); /* strip the highest bit */
236 
237 	return hash;
238 }
239 
240 #ifdef HAVE_FAM_H
stat_cache_handle_fdevent(server * srv,void * _fce,int revent)241 handler_t stat_cache_handle_fdevent(server *srv, void *_fce, int revent) {
242 	size_t i;
243 	stat_cache *sc = srv->stat_cache;
244 	size_t events;
245 
246 	UNUSED(_fce);
247 	/* */
248 
249 	if ((revent & FDEVENT_IN) &&
250 	    sc->fam) {
251 
252 		events = FAMPending(sc->fam);
253 
254 		for (i = 0; i < events; i++) {
255 			FAMEvent fe;
256 			fam_dir_entry *fam_dir;
257 			splay_tree *node;
258 			int ndx, j;
259 
260 			FAMNextEvent(sc->fam, &fe);
261 
262 			/* handle event */
263 
264 			switch(fe.code) {
265 			case FAMChanged:
266 			case FAMDeleted:
267 			case FAMMoved:
268 				/* if the filename is a directory remove the entry */
269 
270 				fam_dir = fe.userdata;
271 				fam_dir->version++;
272 
273 				/* file/dir is still here */
274 				if (fe.code == FAMChanged) break;
275 
276 				/* we have 2 versions, follow and no-follow-symlink */
277 
278 				for (j = 0; j < 2; j++) {
279 					buffer_copy_string(sc->hash_key, fe.filename);
280 					buffer_append_long(sc->hash_key, j);
281 
282 					ndx = hashme(sc->hash_key);
283 
284 					sc->dirs = splaytree_splay(sc->dirs, ndx);
285 					node = sc->dirs;
286 
287 					if (node && (node->key == ndx)) {
288 						int osize = splaytree_size(sc->dirs);
289 
290 						fam_dir_entry_free(node->data);
291 						sc->dirs = splaytree_delete(sc->dirs, ndx);
292 
293 						assert(osize - 1 == splaytree_size(sc->dirs));
294 					}
295 				}
296 				break;
297 			default:
298 				break;
299 			}
300 		}
301 	}
302 
303 	if (revent & FDEVENT_HUP) {
304 		/* fam closed the connection */
305 		srv->stat_cache->fam_fcce_ndx = -1;
306 
307 		fdevent_event_del(srv->ev, &(sc->fam_fcce_ndx), FAMCONNECTION_GETFD(sc->fam));
308 		fdevent_unregister(srv->ev, FAMCONNECTION_GETFD(sc->fam));
309 
310 		FAMClose(sc->fam);
311 		free(sc->fam);
312 
313 		sc->fam = NULL;
314 	}
315 
316 	return HANDLER_GO_ON;
317 }
318 
buffer_copy_dirname(buffer * dst,buffer * file)319 static int buffer_copy_dirname(buffer *dst, buffer *file) {
320 	size_t i;
321 
322 	if (buffer_is_empty(file)) return -1;
323 
324 	for (i = file->used - 1; i+1 > 0; i--) {
325 		if (file->ptr[i] == '/') {
326 			buffer_copy_string_len(dst, file->ptr, i);
327 			return 0;
328 		}
329 	}
330 
331 	return -1;
332 }
333 #endif
334 
335 #ifdef HAVE_LSTAT
stat_cache_lstat(server * srv,buffer * dname,struct stat * lst)336 static int stat_cache_lstat(server *srv, buffer *dname, struct stat *lst) {
337 	if (lstat(dname->ptr, lst) == 0) {
338 		return S_ISLNK(lst->st_mode) ? 0 : 1;
339 	}
340 	else {
341 		log_error_write(srv, __FILE__, __LINE__, "sbs",
342 				"lstat failed for:",
343 				dname, strerror(errno));
344 	};
345 	return -1;
346 }
347 #endif
348 
349 /***
350  *
351  *
352  *
353  * returns:
354  *  - HANDLER_FINISHED on cache-miss (don't forget to reopen the file)
355  *  - HANDLER_ERROR on stat() failed -> see errno for problem
356  */
357 
stat_cache_get_entry(server * srv,connection * con,buffer * name,stat_cache_entry ** ret_sce)358 handler_t stat_cache_get_entry(server *srv, connection *con, buffer *name, stat_cache_entry **ret_sce) {
359 #ifdef HAVE_FAM_H
360 	fam_dir_entry *fam_dir = NULL;
361 	int dir_ndx = -1;
362 	splay_tree *dir_node = NULL;
363 #endif
364 	stat_cache_entry *sce = NULL;
365 	stat_cache *sc;
366 	struct stat st;
367 	size_t k;
368 	int fd;
369 	struct stat lst;
370 #ifdef DEBUG_STAT_CACHE
371 	size_t i;
372 #endif
373 
374 	int file_ndx;
375 	splay_tree *file_node = NULL;
376 
377 	*ret_sce = NULL;
378 
379 	/*
380 	 * check if the directory for this file has changed
381 	 */
382 
383 	sc = srv->stat_cache;
384 
385 	buffer_copy_string_buffer(sc->hash_key, name);
386 	buffer_append_long(sc->hash_key, con->conf.follow_symlink);
387 
388 	file_ndx = hashme(sc->hash_key);
389 	sc->files = splaytree_splay(sc->files, file_ndx);
390 
391 #ifdef DEBUG_STAT_CACHE
392 	for (i = 0; i < ctrl.used; i++) {
393 		if (ctrl.ptr[i] == file_ndx) break;
394 	}
395 #endif
396 
397 	if (sc->files && (sc->files->key == file_ndx)) {
398 #ifdef DEBUG_STAT_CACHE
399 		/* it was in the cache */
400 		assert(i < ctrl.used);
401 #endif
402 
403 		/* we have seen this file already and
404 		 * don't stat() it again in the same second */
405 
406 		file_node = sc->files;
407 
408 		sce = file_node->data;
409 
410 		/* check if the name is the same, we might have a collision */
411 
412 		if (buffer_is_equal(name, sce->name)) {
413 			if (srv->srvconf.stat_cache_engine == STAT_CACHE_ENGINE_SIMPLE) {
414 				if (sce->stat_ts == srv->cur_ts) {
415 					*ret_sce = sce;
416 					return HANDLER_GO_ON;
417 				}
418 			}
419 		} else {
420 			/* oops, a collision,
421 			 *
422 			 * file_node is used by the FAM check below to see if we know this file
423 			 * and if we can save a stat().
424 			 *
425 			 * BUT, the sce is not reset here as the entry into the cache is ok, we
426 			 * it is just not pointing to our requested file.
427 			 *
428 			 *  */
429 
430 			file_node = NULL;
431 		}
432 	} else {
433 #ifdef DEBUG_STAT_CACHE
434 		if (i != ctrl.used) {
435 			log_error_write(srv, __FILE__, __LINE__, "xSB",
436 				file_ndx, "was already inserted but not found in cache, ", name);
437 		}
438 		assert(i == ctrl.used);
439 #endif
440 	}
441 
442 #ifdef HAVE_FAM_H
443 	/* dir-check */
444 	if (srv->srvconf.stat_cache_engine == STAT_CACHE_ENGINE_FAM) {
445 		if (0 != buffer_copy_dirname(sc->dir_name, name)) {
446 			log_error_write(srv, __FILE__, __LINE__, "sb",
447 				"no '/' found in filename:", name);
448 			return HANDLER_ERROR;
449 		}
450 
451 		buffer_copy_string_buffer(sc->hash_key, sc->dir_name);
452 		buffer_append_long(sc->hash_key, con->conf.follow_symlink);
453 
454 		dir_ndx = hashme(sc->hash_key);
455 
456 		sc->dirs = splaytree_splay(sc->dirs, dir_ndx);
457 
458 		if (sc->dirs && (sc->dirs->key == dir_ndx)) {
459 			dir_node = sc->dirs;
460 		}
461 
462 		if (dir_node && file_node) {
463 			/* we found a file */
464 
465 			sce = file_node->data;
466 			fam_dir = dir_node->data;
467 
468 			if (fam_dir->version == sce->dir_version) {
469 				/* the stat()-cache entry is still ok */
470 
471 				*ret_sce = sce;
472 				return HANDLER_GO_ON;
473 			}
474 		}
475 	}
476 #endif
477 
478 	/*
479 	 * *lol*
480 	 * - open() + fstat() on a named-pipe results in a (intended) hang.
481 	 * - stat() if regular file + open() to see if we can read from it is better
482 	 *
483 	 * */
484 	if (-1 == stat(name->ptr, &st)) {
485 		return HANDLER_ERROR;
486 	}
487 
488 
489 	if (S_ISREG(st.st_mode)) {
490 		/* fix broken stat/open for symlinks to reg files with appended slash on freebsd,osx */
491 		if (name->ptr[name->used-2] == '/') {
492 			errno = ENOTDIR;
493 			return HANDLER_ERROR;
494 		}
495 
496 		/* try to open the file to check if we can read it */
497 		if (-1 == (fd = open(name->ptr, O_RDONLY))) {
498 			return HANDLER_ERROR;
499 		}
500 		close(fd);
501 	}
502 
503 	if (NULL == sce) {
504 #ifdef DEBUG_STAT_CACHE
505 		int osize = splaytree_size(sc->files);
506 #endif
507 
508 		sce = stat_cache_entry_init();
509 		buffer_copy_string_buffer(sce->name, name);
510 
511 		sc->files = splaytree_insert(sc->files, file_ndx, sce);
512 #ifdef DEBUG_STAT_CACHE
513 		if (ctrl.size == 0) {
514 			ctrl.size = 16;
515 			ctrl.used = 0;
516 			ctrl.ptr = malloc(ctrl.size * sizeof(*ctrl.ptr));
517 		} else if (ctrl.size == ctrl.used) {
518 			ctrl.size += 16;
519 			ctrl.ptr = realloc(ctrl.ptr, ctrl.size * sizeof(*ctrl.ptr));
520 		}
521 
522 		ctrl.ptr[ctrl.used++] = file_ndx;
523 
524 		assert(sc->files);
525 		assert(sc->files->data == sce);
526 		assert(osize + 1 == splaytree_size(sc->files));
527 #endif
528 	}
529 
530 	sce->st = st;
531 	sce->stat_ts = srv->cur_ts;
532 
533 	/* catch the obvious symlinks
534 	 *
535 	 * this is not a secure check as we still have a race-condition between
536 	 * the stat() and the open. We can only solve this by
537 	 * 1. open() the file
538 	 * 2. fstat() the fd
539 	 *
540 	 * and keeping the file open for the rest of the time. But this can
541 	 * only be done at network level.
542 	 *
543 	 * per default it is not a symlink
544 	 * */
545 #ifdef HAVE_LSTAT
546 	sce->is_symlink = 0;
547 
548 	/* we want to only check for symlinks if we should block symlinks.
549 	 */
550 	if (!con->conf.follow_symlink) {
551 		if (stat_cache_lstat(srv, name, &lst)  == 0) {
552 #ifdef DEBUG_STAT_CACHE
553 				log_error_write(srv, __FILE__, __LINE__, "sb",
554 						"found symlink", name);
555 #endif
556 				sce->is_symlink = 1;
557 		}
558 
559 		/*
560 		 * we assume "/" can not be symlink, so
561 		 * skip the symlink stuff if our path is /
562 		 **/
563 		else if ((name->used > 2)) {
564 			buffer *dname;
565 			char *s_cur;
566 
567 			dname = buffer_init();
568 			buffer_copy_string_buffer(dname, name);
569 
570 			while ((s_cur = strrchr(dname->ptr,'/'))) {
571 				*s_cur = '\0';
572 				dname->used = s_cur - dname->ptr + 1;
573 				if (dname->ptr == s_cur) {
574 #ifdef DEBUG_STAT_CACHE
575 					log_error_write(srv, __FILE__, __LINE__, "s", "reached /");
576 #endif
577 					break;
578 				}
579 #ifdef DEBUG_STAT_CACHE
580 				log_error_write(srv, __FILE__, __LINE__, "sbs",
581 						"checking if", dname, "is a symlink");
582 #endif
583 				if (stat_cache_lstat(srv, dname, &lst)  == 0) {
584 					sce->is_symlink = 1;
585 #ifdef DEBUG_STAT_CACHE
586 					log_error_write(srv, __FILE__, __LINE__, "sb",
587 							"found symlink", dname);
588 #endif
589 					break;
590 				};
591 			};
592 			buffer_free(dname);
593 		};
594 	};
595 #endif
596 
597 	if (S_ISREG(st.st_mode)) {
598 		/* determine mimetype */
599 		buffer_reset(sce->content_type);
600 #ifdef HAVE_XATTR
601 		if (con->conf.use_xattr) {
602 			stat_cache_attr_get(sce->content_type, name->ptr);
603 		}
604 #endif
605 		/* xattr did not set a content-type. ask the config */
606 		if (buffer_is_empty(sce->content_type)) {
607 			for (k = 0; k < con->conf.mimetypes->used; k++) {
608 				data_string *ds = (data_string *)con->conf.mimetypes->data[k];
609 				buffer *type = ds->key;
610 
611 				if (type->used == 0) continue;
612 
613 				/* check if the right side is the same */
614 				if (type->used > name->used) continue;
615 
616 				if (0 == strncasecmp(name->ptr + name->used - type->used, type->ptr, type->used - 1)) {
617 					buffer_copy_string_buffer(sce->content_type, ds->value);
618 					break;
619 				}
620 			}
621 		}
622 		etag_create(sce->etag, &(sce->st), con->etag_flags);
623 	} else if (S_ISDIR(st.st_mode)) {
624 		etag_create(sce->etag, &(sce->st), con->etag_flags);
625 	}
626 
627 #ifdef HAVE_FAM_H
628 	if (sc->fam &&
629 	    (srv->srvconf.stat_cache_engine == STAT_CACHE_ENGINE_FAM)) {
630 		/* is this directory already registered ? */
631 		if (!dir_node) {
632 			fam_dir = fam_dir_entry_init();
633 			fam_dir->fc = sc->fam;
634 
635 			buffer_copy_string_buffer(fam_dir->name, sc->dir_name);
636 
637 			fam_dir->version = 1;
638 
639 			fam_dir->req = calloc(1, sizeof(FAMRequest));
640 
641 			if (0 != FAMMonitorDirectory(sc->fam, fam_dir->name->ptr,
642 						     fam_dir->req, fam_dir)) {
643 
644 				log_error_write(srv, __FILE__, __LINE__, "sbsbs",
645 						"monitoring dir failed:",
646 						fam_dir->name,
647 						"file:", name,
648 						FamErrlist[FAMErrno]);
649 
650 				fam_dir_entry_free(fam_dir);
651 			} else {
652 				int osize = 0;
653 
654 			       	if (sc->dirs) {
655 					osize = sc->dirs->size;
656 				}
657 
658 				sc->dirs = splaytree_insert(sc->dirs, dir_ndx, fam_dir);
659 				assert(sc->dirs);
660 				assert(sc->dirs->data == fam_dir);
661 				assert(osize == (sc->dirs->size - 1));
662 			}
663 		} else {
664 			fam_dir = dir_node->data;
665 		}
666 
667 		/* bind the fam_fc to the stat() cache entry */
668 
669 		if (fam_dir) {
670 			sce->dir_version = fam_dir->version;
671 			sce->dir_ndx     = dir_ndx;
672 		}
673 	}
674 #endif
675 
676 	*ret_sce = sce;
677 
678 	return HANDLER_GO_ON;
679 }
680 
681 /**
682  * remove stat() from cache which havn't been stat()ed for
683  * more than 10 seconds
684  *
685  *
686  * walk though the stat-cache, collect the ids which are too old
687  * and remove them in a second loop
688  */
689 
stat_cache_tag_old_entries(server * srv,splay_tree * t,int * keys,size_t * ndx)690 static int stat_cache_tag_old_entries(server *srv, splay_tree *t, int *keys, size_t *ndx) {
691 	stat_cache_entry *sce;
692 
693 	if (!t) return 0;
694 
695 	stat_cache_tag_old_entries(srv, t->left, keys, ndx);
696 	stat_cache_tag_old_entries(srv, t->right, keys, ndx);
697 
698 	sce = t->data;
699 
700 	if (srv->cur_ts - sce->stat_ts > 2) {
701 		keys[(*ndx)++] = t->key;
702 	}
703 
704 	return 0;
705 }
706 
stat_cache_trigger_cleanup(server * srv)707 int stat_cache_trigger_cleanup(server *srv) {
708 	stat_cache *sc;
709 	size_t max_ndx = 0, i;
710 	int *keys;
711 
712 	sc = srv->stat_cache;
713 
714 	if (!sc->files) return 0;
715 
716 	keys = calloc(1, sizeof(size_t) * sc->files->size);
717 
718 	stat_cache_tag_old_entries(srv, sc->files, keys, &max_ndx);
719 
720 	for (i = 0; i < max_ndx; i++) {
721 		int ndx = keys[i];
722 		splay_tree *node;
723 
724 		sc->files = splaytree_splay(sc->files, ndx);
725 
726 		node = sc->files;
727 
728 		if (node && (node->key == ndx)) {
729 #ifdef DEBUG_STAT_CACHE
730 			size_t j;
731 			int osize = splaytree_size(sc->files);
732 			stat_cache_entry *sce = node->data;
733 #endif
734 			stat_cache_entry_free(node->data);
735 			sc->files = splaytree_delete(sc->files, ndx);
736 
737 #ifdef DEBUG_STAT_CACHE
738 			for (j = 0; j < ctrl.used; j++) {
739 				if (ctrl.ptr[j] == ndx) {
740 					ctrl.ptr[j] = ctrl.ptr[--ctrl.used];
741 					break;
742 				}
743 			}
744 
745 			assert(osize - 1 == splaytree_size(sc->files));
746 #endif
747 		}
748 	}
749 
750 	free(keys);
751 
752 	return 0;
753 }
754