xref: /lighttpd1.4/src/stat_cache.c (revision 4f0261e5)
1 #include "first.h"
2 
3 #include "stat_cache.h"
4 #include "base.h"
5 #include "log.h"
6 #include "fdevent.h"
7 #include "etag.h"
8 #include "splaytree.h"
9 
10 #include <sys/types.h>
11 #include <sys/stat.h>
12 
13 #include <stdlib.h>
14 #include <string.h>
15 #include <errno.h>
16 #include <unistd.h>
17 #include <fcntl.h>
18 
19 #ifdef HAVE_ATTR_ATTRIBUTES_H
20 # include <attr/attributes.h>
21 #endif
22 
23 #ifdef HAVE_SYS_EXTATTR_H
24 # include <sys/extattr.h>
25 #endif
26 
27 #ifndef HAVE_LSTAT
28 #define lstat stat
29 #ifndef S_ISLNK
30 #define S_ISLNK(mode) (0)
31 #endif
32 #endif
33 
34 /*
35  * stat-cache
36  *
37  * - a splay-tree is used as we can use the caching effect of it
38  */
39 
40 enum {
41   STAT_CACHE_ENGINE_UNSET,
42   STAT_CACHE_ENGINE_NONE,
43   STAT_CACHE_ENGINE_SIMPLE,
44   STAT_CACHE_ENGINE_FAM
45 };
46 
47 struct stat_cache_fam;  /* declaration */
48 
49 typedef struct stat_cache {
50 	splay_tree *files; /* nodes of tree are (stat_cache_entry *) */
51 	struct stat_cache_fam *scf;
52 } stat_cache;
53 
54 
55 /* the famous DJB hash function for strings */
56 static uint32_t djbhash(const char *str, const size_t len)
57 {
58     const unsigned char * const s = (const unsigned char *)str;
59     uint32_t hash = 5381;
60     for (size_t i = 0; i < len; ++i) hash = ((hash << 5) + hash) ^ s[i];
61     return hash;
62 }
63 
64 
65 static uint32_t hashme(const char *str, const size_t len)
66 {
67     /* strip highest bit of hash value for splaytree */
68     return djbhash(str,len) & ~(((uint32_t)1) << 31);
69 }
70 
71 
72 static void * stat_cache_sptree_find(splay_tree ** const sptree,
73                                      const char * const name,
74                                      size_t len)
75 {
76     const int ndx = hashme(name, len);
77     *sptree = splaytree_splay(*sptree, ndx);
78     return (*sptree && (*sptree)->key == ndx) ? (*sptree)->data : NULL;
79 }
80 
81 
82 #ifdef HAVE_FAM_H
83 
84 /* monitor changes in directories using FAM
85  *
86  * This implementation employing FAM monitors directories as they are used,
87  * and maintains a reference count for cache use within stat_cache.c.
88  * A periodic job runs in lighttpd every 32 seconds, expiring entires unused
89  * in last 64 seconds out of the cache and cancelling FAM monitoring.  Items
90  * within the cache are checked against the filesystem upon use if last stat()
91  * was greater than or equal to 16 seconds ago.
92  *
93  * This implementation does not monitor every directory in a tree, and therefore
94  * the cache may get out-of-sync with the filesystem.  Delays in receiving and
95  * processing events from FAM might also lead to stale cache entries.
96  *
97  * For many websites, a large number of files are seldom, if ever, modified,
98  * and a common practice with images is to create a new file with a new name
99  * when a new version is needed, in order for client browsers and CDNs to better
100  * cache the content.  Given this, most use will see little difference in
101  * performance between server.stat-cache-engine = "fam" and "simple" (default).
102  * The default server.stat-cache-engine = "simple" calls stat() on a target once
103  * per second, and reuses that information until the next second.  For use where
104  * changes must be immediately visible, server.stat-cache-engine = "disable"
105  * should be used.
106  *
107  * When considering use of server.stat-cache-engine = "fam", there are a few
108  * additional limitations for this cache implementation using FAM.
109  * - symlinks to files located outside of the current directory do not result
110  *   in changes to that file being monitored (unless that file is in a directory
111  *   which is monitored as a result of a different request).  symlinks can be
112  *   chained and can be circular.  This implementation *does not* readlink() or
113  *   realpath() to resolve the chains to find and monitor the ultimate target
114  *   directory.  While symlinks to files located outside the current directory
115  *   are not monitored, symlinks to directories *are* monitored, though chains
116  *   of symlinks to directories do not result in monitoring of the directories
117  *   containing intermediate symlinks to the target directory.
118  * - directory rename of a directory which is not currently being monitored will
119  *   result in stale information in the cache if there is a subdirectory that is
120  *   being monitored.
121  * Even though lighttpd will not receive FAM events in the above cases, lighttpd
122  * does re-validate the information in the cache upon use if the cache entry has
123  * not been checked in 16 seconds, so that is the upper limit for use of stale
124  * data.
125  *
126  * Use of server.stat-cache-engine = "fam" is discouraged for extremely volatile
127  * directories such as temporary directories (e.g. /tmp and maybe /var/tmp) due
128  * to the overhead of processing the additional noise generated from changes.
129  * Related, server.stat-cache-engine = "fam" is not recommended on trees of
130  * untrusted files where a malicious user could generate an excess of change
131  * events.
132  *
133  * Internal note: lighttpd walks the caches to prune trees in stat_cache when an
134  * event is received for a directory (or symlink to a directory) which has been
135  * deleted or renamed.  The splaytree data structure is suboptimal for frequent
136  * changes of large directories trees where there have been a large number of
137  * different files recently accessed and part of the stat_cache.
138  */
139 
140 #include <fam.h>
141 
142 typedef struct fam_dir_entry {
143 	buffer *name;
144 	int refcnt;
145 	FAMRequest req;
146 	time_t stat_ts;
147 	dev_t st_dev;
148 	ino_t st_ino;
149 	struct fam_dir_entry *fam_parent;
150 } fam_dir_entry;
151 
152 typedef struct stat_cache_fam {
153 	splay_tree *dirs; /* the nodes of the tree are fam_dir_entry */
154 	FAMConnection fam;
155 	fdnode *fdn;
156 	int fd;
157 } stat_cache_fam;
158 
159 static fam_dir_entry * fam_dir_entry_init(const char *name, size_t len)
160 {
161     fam_dir_entry * const fam_dir = calloc(1, sizeof(*fam_dir));
162     force_assert(NULL != fam_dir);
163 
164     fam_dir->name = buffer_init();
165     buffer_copy_string_len(fam_dir->name, name, len);
166     fam_dir->refcnt = 0;
167 
168     return fam_dir;
169 }
170 
171 static void fam_dir_entry_free(fam_dir_entry *fam_dir)
172 {
173     if (!fam_dir) return;
174     /*(fam_dir->parent might be invalid pointer here; ignore)*/
175     buffer_free(fam_dir->name);
176     free(fam_dir);
177 }
178 
179 static void fam_dir_invalidate_node(fam_dir_entry *fam_dir)
180 {
181     fam_dir->stat_ts = 0;
182     if (fam_dir->fam_parent) {
183         --fam_dir->fam_parent->refcnt;
184         fam_dir->fam_parent = NULL;
185     }
186 }
187 
188 /*
189  * walk though splay_tree and collect contents of dir tree.
190  * remove tagged entries in a second loop
191  */
192 
193 static void fam_dir_tag_refcnt(splay_tree *t, int *keys, int *ndx)
194 {
195     if (*ndx == 8192) return; /*(must match num array entries in keys[])*/
196     if (t->left)  fam_dir_tag_refcnt(t->left,  keys, ndx);
197     if (t->right) fam_dir_tag_refcnt(t->right, keys, ndx);
198     if (*ndx == 8192) return; /*(must match num array entries in keys[])*/
199 
200     fam_dir_entry * const fam_dir = t->data;
201     if (0 == fam_dir->refcnt) {
202         fam_dir_invalidate_node(fam_dir);
203         keys[(*ndx)++] = t->key;
204     }
205 }
206 
207 static void fam_dir_periodic_cleanup(server *srv) {
208     int max_ndx, i;
209     int keys[8192]; /* 32k size on stack */
210     stat_cache_fam * const scf = srv->stat_cache->scf;
211     do {
212         if (!scf->dirs) return;
213         max_ndx = 0;
214         fam_dir_tag_refcnt(scf->dirs, keys, &max_ndx);
215         for (i = 0; i < max_ndx; ++i) {
216             const int ndx = keys[i];
217             splay_tree *node = scf->dirs = splaytree_splay(scf->dirs, ndx);
218             if (node && node->key == ndx) {
219                 fam_dir_entry *fam_dir = node->data;
220                 scf->dirs = splaytree_delete(scf->dirs, ndx);
221                 FAMCancelMonitor(&scf->fam, &fam_dir->req);
222                 fam_dir_entry_free(fam_dir);
223             }
224         }
225     } while (max_ndx == sizeof(keys)/sizeof(int));
226 }
227 
228 static void fam_dir_invalidate_tree(splay_tree *t, const char *name, size_t len)
229 {
230     /*force_assert(t);*/
231     if (t->left)  fam_dir_invalidate_tree(t->left,  name, len);
232     if (t->right) fam_dir_invalidate_tree(t->right, name, len);
233 
234     fam_dir_entry * const fam_dir = t->data;
235     buffer *b = fam_dir->name;
236     size_t blen = buffer_string_length(b);
237     if (blen > len && b->ptr[len] == '/' && 0 == memcmp(b->ptr, name, len))
238         fam_dir_invalidate_node(fam_dir);
239 }
240 
241 /* declarations */
242 static void stat_cache_delete_tree(server *srv, const char *name, size_t len);
243 static void stat_cache_invalidate_dir_tree(server *srv, const char *name, size_t len);
244 
245 static void stat_cache_handle_fdevent_in(server *srv, stat_cache_fam *scf)
246 {
247     for (int i = 0, ndx; i || (i = FAMPending(&scf->fam)) > 0; --i) {
248         FAMEvent fe;
249         if (FAMNextEvent(&scf->fam, &fe) < 0) break;
250 
251         /* ignore events which may have been pending for
252          * paths recently cancelled via FAMCancelMonitor() */
253         ndx = (int)(intptr_t)fe.userdata;
254         scf->dirs = splaytree_splay(scf->dirs, ndx);
255         if (!scf->dirs || scf->dirs->key != ndx) {
256             continue;
257         }
258         fam_dir_entry *fam_dir = scf->dirs->data;
259         if (FAMREQUEST_GETREQNUM(&fam_dir->req)
260             != FAMREQUEST_GETREQNUM(&fe.fr)) {
261             continue;
262         }
263 
264         if (fe.filename[0] != '/') {
265             buffer * const n = fam_dir->name;
266             fam_dir_entry *fam_link;
267             size_t len;
268             switch(fe.code) {
269             case FAMCreated:
270                 /* file created in monitored dir modifies dir and
271                  * we should get a separate FAMChanged event for dir.
272                  * Therefore, ignore file FAMCreated event here.
273                  * Also, if FAMNoExists() is used, might get spurious
274                  * FAMCreated events as changes are made e.g. in monitored
275                  * sub-sub-sub dirs and the library discovers new (already
276                  * existing) dir entries */
277                 continue;
278             case FAMChanged:
279                 /* file changed in monitored dir does not modify dir */
280             case FAMDeleted:
281             case FAMMoved:
282                 /* file deleted or moved in monitored dir modifies dir,
283                  * but FAM provides separate notification for that */
284 
285                 /* temporarily append filename to dir in fam_dir->name to
286                  * construct path, then delete stat_cache entry (if any)*/
287                 len = buffer_string_length(n);
288                 buffer_append_string_len(n, CONST_STR_LEN("/"));
289                 buffer_append_string_len(n,fe.filename,strlen(fe.filename));
290                 /* (alternatively, could chose to stat() and update)*/
291                 stat_cache_invalidate_entry(srv, CONST_BUF_LEN(n));
292 
293                 fam_link = /*(check if might be symlink to monitored dir)*/
294                   stat_cache_sptree_find(&scf->dirs, CONST_BUF_LEN(n));
295                 if (fam_link && !buffer_is_equal(fam_link->name, n))
296                     fam_link = NULL;
297 
298                 buffer_string_set_length(n, len);
299 
300                 if (fam_link) {
301                     /* replaced symlink changes containing dir */
302                     stat_cache_invalidate_entry(srv, CONST_BUF_LEN(n));
303                     /* handle symlink to dir as deleted dir below */
304                     fe.code = FAMDeleted;
305                     fam_dir = fam_link;
306                     break;
307                 }
308                 continue;
309             default:
310                 continue;
311             }
312         }
313 
314         switch(fe.code) {
315         case FAMChanged:
316             stat_cache_invalidate_entry(srv, CONST_BUF_LEN(fam_dir->name));
317             break;
318         case FAMDeleted:
319         case FAMMoved:
320             stat_cache_delete_tree(srv, CONST_BUF_LEN(fam_dir->name));
321             fam_dir_invalidate_node(fam_dir);
322             if (scf->dirs)
323                 fam_dir_invalidate_tree(scf->dirs,CONST_BUF_LEN(fam_dir->name));
324             fam_dir_periodic_cleanup(srv);
325             break;
326         default:
327             break;
328         }
329     }
330 }
331 
332 static handler_t stat_cache_handle_fdevent(server *srv, void *_fce, int revent)
333 {
334 	stat_cache_fam *scf = srv->stat_cache->scf;
335 	UNUSED(_fce);
336 
337 	if (revent & FDEVENT_IN) {
338 		stat_cache_handle_fdevent_in(srv, scf);
339 	}
340 
341 	if (revent & (FDEVENT_HUP|FDEVENT_RDHUP)) {
342 		/* fam closed the connection */
343 		fdevent_fdnode_event_del(srv->ev, scf->fdn);
344 		fdevent_unregister(srv->ev, scf->fd);
345 		scf->fdn = NULL;
346 
347 		FAMClose(&scf->fam);
348 		scf->fd = -1;
349 	}
350 
351 	return HANDLER_GO_ON;
352 }
353 
354 static stat_cache_fam * stat_cache_init_fam(server *srv) {
355 	stat_cache_fam *scf = calloc(1, sizeof(*scf));
356 	force_assert(scf);
357 	scf->fd = -1;
358 
359 	/* setup FAM */
360 	if (0 != FAMOpen2(&scf->fam, "lighttpd")) {
361 		log_error_write(srv, __FILE__, __LINE__, "s",
362 				"could not open a fam connection, dieing.");
363 		return NULL;
364 	}
365       #ifdef HAVE_FAMNOEXISTS
366 	FAMNoExists(&scf->fam);
367       #endif
368 
369 	scf->fd = FAMCONNECTION_GETFD(&scf->fam);
370 	fdevent_setfd_cloexec(scf->fd);
371 	scf->fdn = fdevent_register(srv->ev, scf->fd, stat_cache_handle_fdevent, NULL);
372 	fdevent_fdnode_event_set(srv->ev, scf->fdn, FDEVENT_IN | FDEVENT_RDHUP);
373 
374 	return scf;
375 }
376 
377 static void stat_cache_free_fam(stat_cache_fam *scf) {
378 	if (NULL == scf) return;
379 
380 	while (scf->dirs) {
381 		/*(skip entry invalidation and FAMCancelMonitor())*/
382 		splay_tree *node = scf->dirs;
383 		fam_dir_entry_free((fam_dir_entry *)node->data);
384 		scf->dirs = splaytree_delete(scf->dirs, node->key);
385 	}
386 
387 	if (-1 != scf->fd) {
388 		/*scf->fdn already cleaned up in fdevent_free()*/
389 		FAMClose(&scf->fam);
390 		/*scf->fd = -1;*/
391 	}
392 
393 	free(scf);
394 }
395 
396 static fam_dir_entry * fam_dir_monitor(server *srv, stat_cache_fam *scf, char *fn, size_t dirlen, struct stat *st)
397 {
398     const int fn_is_dir = S_ISDIR(st->st_mode);
399     /*force_assert(0 != dirlen);*/
400     /*force_assert(fn[0] == '/');*/
401     /* consistency: ensure fn does not end in '/' unless root "/"
402      * FAM events will not end in '/', so easier to match this way */
403     if (fn[dirlen-1] == '/') --dirlen;
404     if (0 == dirlen) dirlen = 1; /* root dir ("/") */
405     /* Note: paths are expected to be normalized before calling stat_cache,
406      * e.g. without repeated '/' */
407     if (!fn_is_dir) {
408         while (fn[--dirlen] != '/') ;
409         if (0 == dirlen) dirlen = 1; /*(should not happen for file)*/
410     }
411     int dir_ndx = hashme(fn, dirlen);
412     fam_dir_entry *fam_dir = NULL;
413 
414     scf->dirs = splaytree_splay(scf->dirs, dir_ndx);
415     if (NULL != scf->dirs && scf->dirs->key == dir_ndx) {
416         fam_dir = scf->dirs->data;
417         if (!buffer_is_equal_string(fam_dir->name, fn, dirlen)) {
418             /* hash collision; preserve existing
419              * do not monitor new to avoid cache thrashing */
420             return NULL;
421         }
422         /* directory already registered */
423     }
424 
425     struct stat lst;
426     int ck_dir = fn_is_dir;
427     if (!fn_is_dir && (NULL==fam_dir || srv->cur_ts - fam_dir->stat_ts >= 16)) {
428         ck_dir = 1;
429         /*(temporarily modify fn)*/
430         fn[dirlen] = '\0';
431         if (0 != lstat(fn, &lst)) {
432             fn[dirlen] = '/';
433             return NULL;
434         }
435         if (!S_ISLNK(lst.st_mode)) {
436             st = &lst;
437         }
438         else if (0 != stat(fn, st)) { /*st passed in now is stat() of dir*/
439             fn[dirlen] = '/';
440             return NULL;
441         }
442         fn[dirlen] = '/';
443     }
444 
445     int ck_lnk = (NULL == fam_dir);
446     if (ck_dir && NULL != fam_dir) {
447         /* check stat() matches device and inode, just in case an external event
448          * not being monitored occurs (e.g. rename of unmonitored parent dir)*/
449         if (st->st_dev != fam_dir->st_dev || st->st_ino != fam_dir->st_ino) {
450             ck_lnk = 1;
451             /*(modifies scf->dirs but no need to re-splay for dir_ndx since
452              * fam_dir is not NULL and so splaytree_insert not called below)*/
453             if (scf->dirs) fam_dir_invalidate_tree(scf->dirs, fn, dirlen);
454             if (!fn_is_dir) /*(if dir, caller is updating stat_cache_entry)*/
455                 stat_cache_update_entry(srv, fn, dirlen, st, NULL);
456             /*(must not delete tree since caller is holding a valid node)*/
457             stat_cache_invalidate_dir_tree(srv, fn, dirlen);
458             if (0 != FAMCancelMonitor(&scf->fam, &fam_dir->req)
459                 || 0 != FAMMonitorDirectory(&scf->fam, fam_dir->name->ptr,
460                                             &fam_dir->req,
461                                             (void *)(intptr_t)dir_ndx)) {
462                 fam_dir->stat_ts = 0; /* invalidate */
463                 return NULL;
464             }
465             fam_dir->st_dev = st->st_dev;
466             fam_dir->st_ino = st->st_ino;
467         }
468         fam_dir->stat_ts = srv->cur_ts;
469     }
470 
471     if (NULL == fam_dir) {
472         fam_dir = fam_dir_entry_init(fn, dirlen);
473 
474         if (0 != FAMMonitorDirectory(&scf->fam,fam_dir->name->ptr,&fam_dir->req,
475                                      (void *)(intptr_t)dir_ndx)) {
476             log_error_write(srv, __FILE__, __LINE__, "sbsss",
477                     "monitoring dir failed:",
478                     fam_dir->name,
479                     "file:", fn,
480                     FamErrlist[FAMErrno]);
481             fam_dir_entry_free(fam_dir);
482             return NULL;
483         }
484 
485         scf->dirs = splaytree_insert(scf->dirs, dir_ndx, fam_dir);
486         fam_dir->stat_ts= srv->cur_ts;
487         fam_dir->st_dev = st->st_dev;
488         fam_dir->st_ino = st->st_ino;
489     }
490 
491     if (ck_lnk) {
492         if (fn_is_dir) {
493             /*(temporarily modify fn)*/
494             char e = fn[dirlen];
495             fn[dirlen] = '\0';
496             if (0 != lstat(fn, &lst)) {
497                 fn[dirlen] = e;
498                 return NULL;
499             }
500             fn[dirlen] = e;
501         }
502         if (fam_dir->fam_parent) {
503             --fam_dir->fam_parent->refcnt;
504             fam_dir->fam_parent = NULL;
505         }
506         if (S_ISLNK(lst.st_mode)) {
507             fam_dir->fam_parent = fam_dir_monitor(srv, scf, fn, dirlen, &lst);
508         }
509     }
510 
511     ++fam_dir->refcnt;
512     return fam_dir;
513 }
514 
515 #endif
516 
517 
518 stat_cache *stat_cache_init(server *srv) {
519 	stat_cache *sc = NULL;
520 	UNUSED(srv);
521 
522 	sc = calloc(1, sizeof(*sc));
523 	force_assert(NULL != sc);
524 
525 #ifdef HAVE_FAM_H
526 	if (STAT_CACHE_ENGINE_FAM == srv->srvconf.stat_cache_engine) {
527 		sc->scf = stat_cache_init_fam(srv);
528 		if (NULL == sc->scf) {
529 			free(sc);
530 			return NULL;
531 		}
532 	}
533 #endif
534 
535 	return sc;
536 }
537 
538 static stat_cache_entry * stat_cache_entry_init(void) {
539 	stat_cache_entry *sce = NULL;
540 
541 	sce = calloc(1, sizeof(*sce));
542 	force_assert(NULL != sce);
543 
544 	sce->name = buffer_init();
545 	sce->etag = buffer_init();
546 	sce->content_type = buffer_init();
547 
548 	return sce;
549 }
550 
551 static void stat_cache_entry_free(void *data) {
552 	stat_cache_entry *sce = data;
553 	if (!sce) return;
554 
555       #ifdef HAVE_FAM_H
556 	/*(decrement refcnt only;
557 	 * defer cancelling FAM monitor on dir even if refcnt reaches zero)*/
558 	if (sce->fam_dir) --((fam_dir_entry *)sce->fam_dir)->refcnt;
559       #endif
560 
561 	buffer_free(sce->etag);
562 	buffer_free(sce->name);
563 	buffer_free(sce->content_type);
564 
565 	free(sce);
566 }
567 
568 void stat_cache_free(stat_cache *sc) {
569 	while (sc->files) {
570 		splay_tree *node = sc->files;
571 		stat_cache_entry_free(node->data);
572 		sc->files = splaytree_delete(sc->files, node->key);
573 	}
574 
575 #ifdef HAVE_FAM_H
576 	stat_cache_free_fam(sc->scf);
577 #endif
578 	free(sc);
579 }
580 
581 int stat_cache_choose_engine (server *srv, const buffer *stat_cache_string) {
582 	if (buffer_string_is_empty(stat_cache_string)) {
583 		srv->srvconf.stat_cache_engine = STAT_CACHE_ENGINE_SIMPLE;
584 	} else if (buffer_is_equal_string(stat_cache_string, CONST_STR_LEN("simple"))) {
585 		srv->srvconf.stat_cache_engine = STAT_CACHE_ENGINE_SIMPLE;
586 #ifdef HAVE_FAM_H
587 	} else if (buffer_is_equal_string(stat_cache_string, CONST_STR_LEN("fam"))) {
588 		srv->srvconf.stat_cache_engine = STAT_CACHE_ENGINE_FAM;
589 #endif
590 	} else if (buffer_is_equal_string(stat_cache_string, CONST_STR_LEN("disable"))) {
591 		srv->srvconf.stat_cache_engine = STAT_CACHE_ENGINE_NONE;
592 	} else {
593 		log_error_write(srv, __FILE__, __LINE__, "sb",
594 				"server.stat-cache-engine can be one of \"disable\", \"simple\","
595 #ifdef HAVE_FAM_H
596 				" \"fam\","
597 #endif
598 				" but not:", stat_cache_string);
599 		return -1;
600 	}
601 	return 0;
602 }
603 
604 #if defined(HAVE_XATTR)
605 static int stat_cache_attr_get(buffer *buf, char *name, char *xattrname) {
606 	int attrlen;
607 	int ret;
608 
609 	buffer_string_prepare_copy(buf, 1023);
610 	attrlen = buf->size - 1;
611 	if(0 == (ret = attr_get(name, xattrname, buf->ptr, &attrlen, 0))) {
612 		buffer_commit(buf, attrlen);
613 	}
614 	return ret;
615 }
616 #elif defined(HAVE_EXTATTR)
617 static int stat_cache_attr_get(buffer *buf, char *name, char *xattrname) {
618 	ssize_t attrlen;
619 
620 	buffer_string_prepare_copy(buf, 1023);
621 
622 	if (-1 != (attrlen = extattr_get_file(name, EXTATTR_NAMESPACE_USER, xattrname, buf->ptr, buf->size - 1))) {
623 		buf->used = attrlen + 1;
624 		buf->ptr[attrlen] = '\0';
625 		return 0;
626 	}
627 	return -1;
628 }
629 #endif
630 
631 const buffer * stat_cache_mimetype_by_ext(const connection *con, const char *name, size_t nlen)
632 {
633     const char *end = name + nlen; /*(end of string)*/
634     const size_t used = con->conf.mimetypes->used;
635     if (used < 16) {
636         for (size_t i = 0; i < used; ++i) {
637             /* suffix match */
638             const data_string *ds = (data_string *)con->conf.mimetypes->data[i];
639             const size_t klen = buffer_string_length(ds->key);
640             if (klen <= nlen && 0 == strncasecmp(end-klen, ds->key->ptr, klen))
641                 return ds->value;
642         }
643     }
644     else {
645         const char *s;
646         const data_string *ds;
647         if (nlen) {
648             for (s = end-1; s != name && *s != '/'; --s) ; /*(like memrchr())*/
649             if (*s == '/') ++s;
650         }
651         else {
652             s = name;
653         }
654         /* search for basename, then longest .ext2.ext1, then .ext1, then "" */
655         ds = (data_string *)array_get_element_klen(con->conf.mimetypes, s, end - s);
656         if (NULL != ds) return ds->value;
657         while (++s < end) {
658             while (*s != '.' && ++s != end) ;
659             if (s == end) break;
660             /* search ".ext" then "ext" */
661             ds = (data_string *)array_get_element_klen(con->conf.mimetypes, s, end - s);
662             if (NULL != ds) return ds->value;
663             /* repeat search without leading '.' to handle situation where
664              * admin configured mimetype.assign keys without leading '.' */
665             if (++s < end) {
666                 if (*s == '.') { --s; continue; }
667                 ds = (data_string *)array_get_element_klen(con->conf.mimetypes, s, end - s);
668                 if (NULL != ds) return ds->value;
669             }
670         }
671         /* search for ""; catchall */
672         ds = (data_string *)array_get_element(con->conf.mimetypes, "");
673         if (NULL != ds) return ds->value;
674     }
675 
676     return NULL;
677 }
678 
679 const buffer * stat_cache_content_type_get(server *srv, connection *con, const buffer *name, stat_cache_entry *sce)
680 {
681     /*(invalid caching if user config has multiple, different
682      * con->conf.mimetypes for same extension (not expected))*/
683     if (!buffer_string_is_empty(sce->content_type)) return sce->content_type;
684 
685     if (S_ISREG(sce->st.st_mode)) {
686         /* determine mimetype */
687         buffer_clear(sce->content_type);
688       #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
689         if (con->conf.use_xattr) {
690             stat_cache_attr_get(sce->content_type, name->ptr, srv->srvconf.xattr_name->ptr);
691         }
692       #else
693         UNUSED(srv);
694       #endif
695         /* xattr did not set a content-type. ask the config */
696         if (buffer_string_is_empty(sce->content_type)) {
697             const buffer *type = stat_cache_mimetype_by_ext(con, CONST_BUF_LEN(name));
698             if (NULL != type) {
699                 buffer_copy_buffer(sce->content_type, type);
700             }
701         }
702         return sce->content_type;
703     }
704 
705     return NULL;
706 }
707 
708 const buffer * stat_cache_etag_get(stat_cache_entry *sce, etag_flags_t flags) {
709     /*(invalid caching if user config has multiple, different con->etag_flags
710      * for same path (not expected, since etag flags should be by filesystem))*/
711     if (!buffer_string_is_empty(sce->etag)) return sce->etag;
712 
713     if (S_ISREG(sce->st.st_mode) || S_ISDIR(sce->st.st_mode)) {
714         etag_create(sce->etag, &sce->st, flags);
715         return sce->etag;
716     }
717 
718     return NULL;
719 }
720 
721 void stat_cache_update_entry(server *srv, const char *name, size_t len,
722                              struct stat *st, buffer *etagb)
723 {
724     if (srv->srvconf.stat_cache_engine == STAT_CACHE_ENGINE_NONE) return;
725     force_assert(0 != len);
726     if (name[len-1] == '/') { if (0 == --len) len = 1; }
727     splay_tree **sptree = &srv->stat_cache->files;
728     stat_cache_entry *sce =
729       stat_cache_sptree_find(sptree, name, len);
730     if (sce && buffer_is_equal_string(sce->name, name, len)) {
731         sce->stat_ts = srv->cur_ts;
732         sce->st = *st; /* etagb might be NULL to clear etag (invalidate) */
733         buffer_copy_string_len(sce->etag, CONST_BUF_LEN(etagb));
734       #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
735         buffer_clear(sce->content_type);
736       #endif
737     }
738 }
739 
740 void stat_cache_delete_entry(server *srv, const char *name, size_t len)
741 {
742     if (srv->srvconf.stat_cache_engine == STAT_CACHE_ENGINE_NONE) return;
743     force_assert(0 != len);
744     if (name[len-1] == '/') { if (0 == --len) len = 1; }
745     splay_tree **sptree = &srv->stat_cache->files;
746     stat_cache_entry *sce = stat_cache_sptree_find(sptree, name, len);
747     if (sce && buffer_is_equal_string(sce->name, name, len)) {
748         stat_cache_entry_free(sce);
749         *sptree = splaytree_delete(*sptree, (*sptree)->key);
750     }
751 }
752 
753 void stat_cache_invalidate_entry(server *srv, const char *name, size_t len)
754 {
755     splay_tree **sptree = &srv->stat_cache->files;
756     stat_cache_entry *sce = stat_cache_sptree_find(sptree, name, len);
757     if (sce && buffer_is_equal_string(sce->name, name, len)) {
758         sce->stat_ts = 0;
759       #ifdef HAVE_FAM_H
760         if (sce->fam_dir != NULL) {
761             --((fam_dir_entry *)sce->fam_dir)->refcnt;
762             sce->fam_dir = NULL;
763         }
764       #endif
765     }
766 }
767 
768 #ifdef HAVE_FAM_H
769 
770 static void stat_cache_invalidate_dir_tree_walk(splay_tree *t,
771                                                 const char *name, size_t len)
772 {
773     if (t->left)  stat_cache_invalidate_dir_tree_walk(t->left,  name, len);
774     if (t->right) stat_cache_invalidate_dir_tree_walk(t->right, name, len);
775 
776     buffer *b = ((stat_cache_entry *)t->data)->name;
777     size_t blen = buffer_string_length(b);
778     if (blen > len && b->ptr[len] == '/' && 0 == memcmp(b->ptr, name, len)) {
779         stat_cache_entry *sce = t->data;
780         sce->stat_ts = 0;
781       #ifdef HAVE_FAM_H
782         if (sce->fam_dir != NULL) {
783             --((fam_dir_entry *)sce->fam_dir)->refcnt;
784             sce->fam_dir = NULL;
785         }
786       #endif
787     }
788 }
789 
790 static void stat_cache_invalidate_dir_tree(server *srv,
791                                            const char *name, size_t len)
792 {
793     splay_tree *sptree = srv->stat_cache->files;
794     if (sptree) stat_cache_invalidate_dir_tree_walk(sptree, name, len);
795 }
796 
797 #endif
798 
799 /*
800  * walk though splay_tree and collect contents of dir tree.
801  * remove tagged entries in a second loop
802  */
803 
804 static void stat_cache_tag_dir_tree(splay_tree *t, const char *name, size_t len,
805                                     int *keys, int *ndx)
806 {
807     if (*ndx == 8192) return; /*(must match num array entries in keys[])*/
808     if (t->left)  stat_cache_tag_dir_tree(t->left,  name, len, keys, ndx);
809     if (t->right) stat_cache_tag_dir_tree(t->right, name, len, keys, ndx);
810     if (*ndx == 8192) return; /*(must match num array entries in keys[])*/
811 
812     buffer *b = ((stat_cache_entry *)t->data)->name;
813     size_t blen = buffer_string_length(b);
814     if (blen > len && b->ptr[len] == '/' && 0 == memcmp(b->ptr, name, len))
815         keys[(*ndx)++] = t->key;
816 }
817 
818 static void stat_cache_prune_dir_tree(stat_cache * const sc,
819                                       const char *name, size_t len)
820 {
821     int max_ndx, i;
822     int keys[8192]; /* 32k size on stack */
823     do {
824         if (!sc->files) return;
825         max_ndx = 0;
826         stat_cache_tag_dir_tree(sc->files, name, len, keys, &max_ndx);
827         for (i = 0; i < max_ndx; ++i) {
828             const int ndx = keys[i];
829             splay_tree *node = sc->files = splaytree_splay(sc->files, ndx);
830             if (node && node->key == ndx) {
831                 stat_cache_entry_free(node->data);
832                 sc->files = splaytree_delete(sc->files, ndx);
833             }
834         }
835     } while (max_ndx == sizeof(keys)/sizeof(int));
836 }
837 
838 static void stat_cache_delete_tree(server *srv, const char *name, size_t len)
839 {
840     stat_cache_delete_entry(srv, name, len);
841     stat_cache_prune_dir_tree(srv->stat_cache, name, len);
842 }
843 
844 void stat_cache_delete_dir(server *srv, const char *name, size_t len)
845 {
846     force_assert(0 != len);
847     if (name[len-1] == '/') { if (0 == --len) len = 1; }
848     stat_cache_delete_tree(srv, name, len);
849   #ifdef HAVE_FAM_H
850     if (srv->srvconf.stat_cache_engine == STAT_CACHE_ENGINE_FAM) {
851         splay_tree **sptree = &srv->stat_cache->scf->dirs;
852         fam_dir_entry *fam_dir = stat_cache_sptree_find(sptree, name, len);
853         if (fam_dir && buffer_is_equal_string(fam_dir->name, name, len))
854             fam_dir_invalidate_node(fam_dir);
855         if (*sptree) fam_dir_invalidate_tree(*sptree, name, len);
856         fam_dir_periodic_cleanup(srv);
857     }
858   #endif
859 }
860 
861 /***
862  *
863  *
864  *
865  * returns:
866  *  - HANDLER_FINISHED on cache-miss (don't forget to reopen the file)
867  *  - HANDLER_ERROR on stat() failed -> see errno for problem
868  */
869 
870 handler_t stat_cache_get_entry(server *srv, connection *con, buffer *name, stat_cache_entry **ret_sce) {
871 	stat_cache_entry *sce = NULL;
872 	stat_cache *sc;
873 	struct stat st;
874 	int file_ndx;
875 	UNUSED(con);
876 
877 	*ret_sce = NULL;
878 
879 	/* consistency: ensure lookup name does not end in '/' unless root "/"
880 	 * (but use full path given with stat(), even with trailing '/') */
881 	int final_slash = 0;
882 	size_t len = buffer_string_length(name);
883 	force_assert(0 != len);
884 	if (name->ptr[len-1] == '/') { final_slash = 1; if (0 == --len) len = 1; }
885 	/* Note: paths are expected to be normalized before calling stat_cache,
886 	 * e.g. without repeated '/' */
887 
888 	if (name->ptr[0] != '/') return HANDLER_ERROR;
889 
890 	/*
891 	 * check if the directory for this file has changed
892 	 */
893 
894 	sc = srv->stat_cache;
895 
896 	file_ndx = hashme(name->ptr, len);
897 	sc->files = splaytree_splay(sc->files, file_ndx);
898 
899 	if (sc->files && (sc->files->key == file_ndx)) {
900 		/* we have seen this file already and
901 		 * don't stat() it again in the same second */
902 
903 		sce = sc->files->data;
904 
905 		/* check if the name is the same, we might have a collision */
906 
907 		if (buffer_is_equal_string(sce->name, name->ptr, len)) {
908 			if (srv->srvconf.stat_cache_engine == STAT_CACHE_ENGINE_SIMPLE) {
909 				if (sce->stat_ts == srv->cur_ts) {
910 					if (final_slash && !S_ISDIR(sce->st.st_mode)) {
911 						errno = ENOTDIR;
912 						return HANDLER_ERROR;
913 					}
914 					*ret_sce = sce;
915 					return HANDLER_GO_ON;
916 				}
917 			}
918 		      #ifdef HAVE_FAM_H
919 			else if (srv->srvconf.stat_cache_engine == STAT_CACHE_ENGINE_FAM
920 				 && sce->fam_dir) { /* entry is in monitored dir */
921 				/* re-stat() periodically, even if monitoring for changes
922 				 * (due to limitations in stat_cache.c use of FAM)
923 				 * (gaps due to not continually monitoring an entire tree) */
924 				if (srv->cur_ts - sce->stat_ts < 16) {
925 					if (final_slash && !S_ISDIR(sce->st.st_mode)) {
926 						errno = ENOTDIR;
927 						return HANDLER_ERROR;
928 					}
929 					*ret_sce = sce;
930 					return HANDLER_GO_ON;
931 				}
932 			}
933 		      #endif
934 		} else {
935 			/* collision, forget about the entry */
936 			sce = NULL;
937 		}
938 	}
939 
940 	if (-1 == stat(name->ptr, &st)) {
941 		return HANDLER_ERROR;
942 	}
943 
944 	if (S_ISREG(st.st_mode)) {
945 		/* fix broken stat/open for symlinks to reg files with appended slash on freebsd,osx */
946 		if (name->ptr[buffer_string_length(name) - 1] == '/') {
947 			errno = ENOTDIR;
948 			return HANDLER_ERROR;
949 		}
950 	}
951 
952 	if (NULL == sce) {
953 
954 		sce = stat_cache_entry_init();
955 		buffer_copy_string_len(sce->name, name->ptr, len);
956 
957 		/* already splayed file_ndx */
958 		if ((NULL != sc->files) && (sc->files->key == file_ndx)) {
959 			/* hash collision: replace old entry */
960 			stat_cache_entry_free(sc->files->data);
961 			sc->files->data = sce;
962 		} else {
963 			sc->files = splaytree_insert(sc->files, file_ndx, sce);
964 		}
965 
966 	} else {
967 
968 		buffer_clear(sce->etag);
969 	      #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
970 		buffer_clear(sce->content_type);
971 	      #endif
972 
973 	}
974 
975 	sce->st = st; /*(copy prior to calling fam_dir_monitor())*/
976 
977 #ifdef HAVE_FAM_H
978 	if (srv->srvconf.stat_cache_engine == STAT_CACHE_ENGINE_FAM) {
979 		if (sce->fam_dir) --((fam_dir_entry *)sce->fam_dir)->refcnt;
980 		sce->fam_dir =
981 		  fam_dir_monitor(srv, sc->scf, CONST_BUF_LEN(name), &st);
982 	      #if 0 /*(performed below)*/
983 		if (NULL != sce->fam_dir) {
984 			/*(may have been invalidated by dir change)*/
985 			sce->stat_ts = srv->cur_ts;
986 		}
987 	      #endif
988 	}
989 #endif
990 
991 	sce->stat_ts = srv->cur_ts;
992 	*ret_sce = sce;
993 
994 	return HANDLER_GO_ON;
995 }
996 
997 int stat_cache_path_contains_symlink(server *srv, buffer *name) {
998     /* caller should check for symlinks only if we should block symlinks. */
999 
1000     /* catch the obvious symlinks
1001      *
1002      * this is not a secure check as we still have a race-condition between
1003      * the stat() and the open. We can only solve this by
1004      * 1. open() the file
1005      * 2. fstat() the fd
1006      *
1007      * and keeping the file open for the rest of the time. But this can
1008      * only be done at network level.
1009      * */
1010 
1011   #ifdef HAVE_LSTAT
1012     /* we assume "/" can not be symlink,
1013      * so skip the symlink stuff if path is "/" */
1014     size_t len = buffer_string_length(name);
1015     force_assert(0 != len);
1016     force_assert(name->ptr[0] == '/');
1017     if (1 == len) return 0;
1018    #ifndef PATH_MAX
1019    #define PATH_MAX 4096
1020    #endif
1021     if (len >= PATH_MAX) return -1;
1022 
1023     char buf[PATH_MAX];
1024     memcpy(buf, name->ptr, len);
1025     char *s_cur = buf+len;
1026     do {
1027         *s_cur = '\0';
1028         struct stat st;
1029         if (0 == lstat(buf, &st)) {
1030             if (S_ISLNK(st.st_mode)) return 1;
1031         }
1032         else {
1033             log_error_write(srv, __FILE__, __LINE__, "sss",
1034                             "lstat failed for:", buf, strerror(errno));
1035             return -1;
1036         }
1037     } while ((s_cur = strrchr(buf, '/')) != buf);
1038   #endif
1039 
1040     return 0;
1041 }
1042 
1043 int stat_cache_open_rdonly_fstat (buffer *name, struct stat *st, int symlinks) {
1044 	/*(Note: O_NOFOLLOW affects only the final path segment, the target file,
1045 	 * not any intermediate symlinks along the path)*/
1046 	const int fd = fdevent_open_cloexec(name->ptr, symlinks, O_RDONLY, 0);
1047 	if (fd >= 0) {
1048 		if (0 == fstat(fd, st)) {
1049 			return fd;
1050 		} else {
1051 			close(fd);
1052 		}
1053 	}
1054 	return -1;
1055 }
1056 
1057 /**
1058  * remove stat() from cache which haven't been stat()ed for
1059  * more than 2 seconds
1060  *
1061  *
1062  * walk though the stat-cache, collect the ids which are too old
1063  * and remove them in a second loop
1064  */
1065 
1066 static int stat_cache_tag_old_entries(server *srv, splay_tree *t, int *keys, size_t *ndx, time_t max_age) {
1067 	stat_cache_entry *sce;
1068 
1069 	if (!t) return 0;
1070 
1071 	stat_cache_tag_old_entries(srv, t->left, keys, ndx, max_age);
1072 	stat_cache_tag_old_entries(srv, t->right, keys, ndx, max_age);
1073 
1074 	sce = t->data;
1075 
1076 	if (srv->cur_ts - sce->stat_ts > max_age) {
1077 		keys[(*ndx)++] = t->key;
1078 	}
1079 
1080 	return 0;
1081 }
1082 
1083 static int stat_cache_periodic_cleanup(server *srv, time_t max_age) {
1084 	stat_cache *sc;
1085 	size_t max_ndx = 0, i;
1086 	int *keys;
1087 
1088 	sc = srv->stat_cache;
1089 
1090 	if (!sc->files) return 0;
1091 
1092 	keys = calloc(1, sizeof(int) * sc->files->size);
1093 	force_assert(NULL != keys);
1094 
1095 	stat_cache_tag_old_entries(srv, sc->files, keys, &max_ndx, max_age);
1096 
1097 	for (i = 0; i < max_ndx; i++) {
1098 		int ndx = keys[i];
1099 		splay_tree *node;
1100 
1101 		sc->files = splaytree_splay(sc->files, ndx);
1102 
1103 		node = sc->files;
1104 
1105 		if (node && (node->key == ndx)) {
1106 			stat_cache_entry_free(node->data);
1107 			sc->files = splaytree_delete(sc->files, ndx);
1108 		}
1109 	}
1110 
1111 	free(keys);
1112 
1113 	return 0;
1114 }
1115 
1116 int stat_cache_trigger_cleanup(server *srv) {
1117 	time_t max_age = 2;
1118 
1119       #ifdef HAVE_FAM_H
1120 	if (srv->srvconf.stat_cache_engine == STAT_CACHE_ENGINE_FAM) {
1121 		if (srv->cur_ts & 0x1F) return 0;
1122 		/* once every 32 seconds (0x1F == 31) */
1123 		max_age = 32;
1124 		fam_dir_periodic_cleanup(srv);
1125 		/* By doing this before stat_cache_periodic_cleanup(),
1126 		 * entries used within the next max_age secs will remain
1127 		 * monitored, instead of effectively flushing and
1128 		 * rebuilding the FAM monitoring every max_age seconds */
1129 	}
1130       #endif
1131 
1132 	stat_cache_periodic_cleanup(srv, max_age);
1133 
1134 	return 0;
1135 }
1136