xref: /lighttpd1.4/src/stat_cache.c (revision 15cdc313)
1 #include "first.h"
2 
3 #include "stat_cache.h"
4 #include "base.h"
5 #include "log.h"
6 #include "fdevent.h"
7 #include "etag.h"
8 #include "splaytree.h"
9 
10 #include <sys/types.h>
11 #include <sys/stat.h>
12 
13 #include <stdlib.h>
14 #include <string.h>
15 #include <errno.h>
16 #include <unistd.h>
17 #include <fcntl.h>
18 
19 #ifdef HAVE_ATTR_ATTRIBUTES_H
20 # include <attr/attributes.h>
21 #endif
22 
23 #ifdef HAVE_SYS_EXTATTR_H
24 # include <sys/extattr.h>
25 #endif
26 
27 #ifndef HAVE_LSTAT
28 #define lstat stat
29 #ifndef S_ISLNK
30 #define S_ISLNK(mode) (0)
31 #endif
32 #endif
33 
34 /*
35  * stat-cache
36  *
37  * - a splay-tree is used as we can use the caching effect of it
38  */
39 
40 enum {
41   STAT_CACHE_ENGINE_UNSET,
42   STAT_CACHE_ENGINE_NONE,
43   STAT_CACHE_ENGINE_SIMPLE,
44   STAT_CACHE_ENGINE_FAM
45 };
46 
47 struct stat_cache_fam;  /* declaration */
48 
49 typedef struct stat_cache {
50 	splay_tree *files; /* nodes of tree are (stat_cache_entry *) */
51 	struct stat_cache_fam *scf;
52 } stat_cache;
53 
54 
55 /* the famous DJB hash function for strings */
56 __attribute_pure__
57 static uint32_t djbhash(const char *str, const size_t len)
58 {
59     const unsigned char * const s = (const unsigned char *)str;
60     uint32_t hash = 5381;
61     for (size_t i = 0; i < len; ++i) hash = ((hash << 5) + hash) ^ s[i];
62     return hash;
63 }
64 
65 
66 __attribute_pure__
67 static uint32_t hashme(const char *str, const size_t len)
68 {
69     /* strip highest bit of hash value for splaytree */
70     return djbhash(str,len) & ~(((uint32_t)1) << 31);
71 }
72 
73 
74 static void * stat_cache_sptree_find(splay_tree ** const sptree,
75                                      const char * const name,
76                                      size_t len)
77 {
78     const int ndx = hashme(name, len);
79     *sptree = splaytree_splay(*sptree, ndx);
80     return (*sptree && (*sptree)->key == ndx) ? (*sptree)->data : NULL;
81 }
82 
83 
84 #ifdef HAVE_FAM_H
85 
86 /* monitor changes in directories using FAM
87  *
88  * This implementation employing FAM monitors directories as they are used,
89  * and maintains a reference count for cache use within stat_cache.c.
90  * A periodic job runs in lighttpd every 32 seconds, expiring entires unused
91  * in last 64 seconds out of the cache and cancelling FAM monitoring.  Items
92  * within the cache are checked against the filesystem upon use if last stat()
93  * was greater than or equal to 16 seconds ago.
94  *
95  * This implementation does not monitor every directory in a tree, and therefore
96  * the cache may get out-of-sync with the filesystem.  Delays in receiving and
97  * processing events from FAM might also lead to stale cache entries.
98  *
99  * For many websites, a large number of files are seldom, if ever, modified,
100  * and a common practice with images is to create a new file with a new name
101  * when a new version is needed, in order for client browsers and CDNs to better
102  * cache the content.  Given this, most use will see little difference in
103  * performance between server.stat-cache-engine = "fam" and "simple" (default).
104  * The default server.stat-cache-engine = "simple" calls stat() on a target once
105  * per second, and reuses that information until the next second.  For use where
106  * changes must be immediately visible, server.stat-cache-engine = "disable"
107  * should be used.
108  *
109  * When considering use of server.stat-cache-engine = "fam", there are a few
110  * additional limitations for this cache implementation using FAM.
111  * - symlinks to files located outside of the current directory do not result
112  *   in changes to that file being monitored (unless that file is in a directory
113  *   which is monitored as a result of a different request).  symlinks can be
114  *   chained and can be circular.  This implementation *does not* readlink() or
115  *   realpath() to resolve the chains to find and monitor the ultimate target
116  *   directory.  While symlinks to files located outside the current directory
117  *   are not monitored, symlinks to directories *are* monitored, though chains
118  *   of symlinks to directories do not result in monitoring of the directories
119  *   containing intermediate symlinks to the target directory.
120  * - directory rename of a directory which is not currently being monitored will
121  *   result in stale information in the cache if there is a subdirectory that is
122  *   being monitored.
123  * Even though lighttpd will not receive FAM events in the above cases, lighttpd
124  * does re-validate the information in the cache upon use if the cache entry has
125  * not been checked in 16 seconds, so that is the upper limit for use of stale
126  * data.
127  *
128  * Use of server.stat-cache-engine = "fam" is discouraged for extremely volatile
129  * directories such as temporary directories (e.g. /tmp and maybe /var/tmp) due
130  * to the overhead of processing the additional noise generated from changes.
131  * Related, server.stat-cache-engine = "fam" is not recommended on trees of
132  * untrusted files where a malicious user could generate an excess of change
133  * events.
134  *
135  * Internal note: lighttpd walks the caches to prune trees in stat_cache when an
136  * event is received for a directory (or symlink to a directory) which has been
137  * deleted or renamed.  The splaytree data structure is suboptimal for frequent
138  * changes of large directories trees where there have been a large number of
139  * different files recently accessed and part of the stat_cache.
140  */
141 
142 #include <fam.h>
143 
144 typedef struct fam_dir_entry {
145 	buffer *name;
146 	int refcnt;
147 	FAMRequest req;
148 	time_t stat_ts;
149 	dev_t st_dev;
150 	ino_t st_ino;
151 	struct fam_dir_entry *fam_parent;
152 } fam_dir_entry;
153 
154 typedef struct stat_cache_fam {
155 	splay_tree *dirs; /* the nodes of the tree are fam_dir_entry */
156 	FAMConnection fam;
157 	fdnode *fdn;
158 	int fd;
159 } stat_cache_fam;
160 
161 static fam_dir_entry * fam_dir_entry_init(const char *name, size_t len)
162 {
163     fam_dir_entry * const fam_dir = calloc(1, sizeof(*fam_dir));
164     force_assert(NULL != fam_dir);
165 
166     fam_dir->name = buffer_init();
167     buffer_copy_string_len(fam_dir->name, name, len);
168     fam_dir->refcnt = 0;
169 
170     return fam_dir;
171 }
172 
173 static void fam_dir_entry_free(fam_dir_entry *fam_dir)
174 {
175     if (!fam_dir) return;
176     /*(fam_dir->parent might be invalid pointer here; ignore)*/
177     buffer_free(fam_dir->name);
178     free(fam_dir);
179 }
180 
181 static void fam_dir_invalidate_node(fam_dir_entry *fam_dir)
182 {
183     fam_dir->stat_ts = 0;
184     if (fam_dir->fam_parent) {
185         --fam_dir->fam_parent->refcnt;
186         fam_dir->fam_parent = NULL;
187     }
188 }
189 
190 /*
191  * walk though splay_tree and collect contents of dir tree.
192  * remove tagged entries in a second loop
193  */
194 
195 static void fam_dir_tag_refcnt(splay_tree *t, int *keys, int *ndx)
196 {
197     if (*ndx == 8192) return; /*(must match num array entries in keys[])*/
198     if (t->left)  fam_dir_tag_refcnt(t->left,  keys, ndx);
199     if (t->right) fam_dir_tag_refcnt(t->right, keys, ndx);
200     if (*ndx == 8192) return; /*(must match num array entries in keys[])*/
201 
202     fam_dir_entry * const fam_dir = t->data;
203     if (0 == fam_dir->refcnt) {
204         fam_dir_invalidate_node(fam_dir);
205         keys[(*ndx)++] = t->key;
206     }
207 }
208 
209 static void fam_dir_periodic_cleanup(server *srv) {
210     int max_ndx, i;
211     int keys[8192]; /* 32k size on stack */
212     stat_cache_fam * const scf = srv->stat_cache->scf;
213     do {
214         if (!scf->dirs) return;
215         max_ndx = 0;
216         fam_dir_tag_refcnt(scf->dirs, keys, &max_ndx);
217         for (i = 0; i < max_ndx; ++i) {
218             const int ndx = keys[i];
219             splay_tree *node = scf->dirs = splaytree_splay(scf->dirs, ndx);
220             if (node && node->key == ndx) {
221                 fam_dir_entry *fam_dir = node->data;
222                 scf->dirs = splaytree_delete(scf->dirs, ndx);
223                 FAMCancelMonitor(&scf->fam, &fam_dir->req);
224                 fam_dir_entry_free(fam_dir);
225             }
226         }
227     } while (max_ndx == sizeof(keys)/sizeof(int));
228 }
229 
230 static void fam_dir_invalidate_tree(splay_tree *t, const char *name, size_t len)
231 {
232     /*force_assert(t);*/
233     if (t->left)  fam_dir_invalidate_tree(t->left,  name, len);
234     if (t->right) fam_dir_invalidate_tree(t->right, name, len);
235 
236     fam_dir_entry * const fam_dir = t->data;
237     buffer *b = fam_dir->name;
238     size_t blen = buffer_string_length(b);
239     if (blen > len && b->ptr[len] == '/' && 0 == memcmp(b->ptr, name, len))
240         fam_dir_invalidate_node(fam_dir);
241 }
242 
243 /* declarations */
244 static void stat_cache_delete_tree(server *srv, const char *name, size_t len);
245 static void stat_cache_invalidate_dir_tree(server *srv, const char *name, size_t len);
246 
247 static void stat_cache_handle_fdevent_in(server *srv, stat_cache_fam *scf)
248 {
249     for (int i = 0, ndx; i || (i = FAMPending(&scf->fam)) > 0; --i) {
250         FAMEvent fe;
251         if (FAMNextEvent(&scf->fam, &fe) < 0) break;
252 
253         /* ignore events which may have been pending for
254          * paths recently cancelled via FAMCancelMonitor() */
255         ndx = (int)(intptr_t)fe.userdata;
256         scf->dirs = splaytree_splay(scf->dirs, ndx);
257         if (!scf->dirs || scf->dirs->key != ndx) {
258             continue;
259         }
260         fam_dir_entry *fam_dir = scf->dirs->data;
261         if (FAMREQUEST_GETREQNUM(&fam_dir->req)
262             != FAMREQUEST_GETREQNUM(&fe.fr)) {
263             continue;
264         }
265 
266         if (fe.filename[0] != '/') {
267             buffer * const n = fam_dir->name;
268             fam_dir_entry *fam_link;
269             size_t len;
270             switch(fe.code) {
271             case FAMCreated:
272                 /* file created in monitored dir modifies dir and
273                  * we should get a separate FAMChanged event for dir.
274                  * Therefore, ignore file FAMCreated event here.
275                  * Also, if FAMNoExists() is used, might get spurious
276                  * FAMCreated events as changes are made e.g. in monitored
277                  * sub-sub-sub dirs and the library discovers new (already
278                  * existing) dir entries */
279                 continue;
280             case FAMChanged:
281                 /* file changed in monitored dir does not modify dir */
282             case FAMDeleted:
283             case FAMMoved:
284                 /* file deleted or moved in monitored dir modifies dir,
285                  * but FAM provides separate notification for that */
286 
287                 /* temporarily append filename to dir in fam_dir->name to
288                  * construct path, then delete stat_cache entry (if any)*/
289                 len = buffer_string_length(n);
290                 buffer_append_string_len(n, CONST_STR_LEN("/"));
291                 buffer_append_string_len(n,fe.filename,strlen(fe.filename));
292                 /* (alternatively, could chose to stat() and update)*/
293                 stat_cache_invalidate_entry(srv, CONST_BUF_LEN(n));
294 
295                 fam_link = /*(check if might be symlink to monitored dir)*/
296                   stat_cache_sptree_find(&scf->dirs, CONST_BUF_LEN(n));
297                 if (fam_link && !buffer_is_equal(fam_link->name, n))
298                     fam_link = NULL;
299 
300                 buffer_string_set_length(n, len);
301 
302                 if (fam_link) {
303                     /* replaced symlink changes containing dir */
304                     stat_cache_invalidate_entry(srv, CONST_BUF_LEN(n));
305                     /* handle symlink to dir as deleted dir below */
306                     fe.code = FAMDeleted;
307                     fam_dir = fam_link;
308                     break;
309                 }
310                 continue;
311             default:
312                 continue;
313             }
314         }
315 
316         switch(fe.code) {
317         case FAMChanged:
318             stat_cache_invalidate_entry(srv, CONST_BUF_LEN(fam_dir->name));
319             break;
320         case FAMDeleted:
321         case FAMMoved:
322             stat_cache_delete_tree(srv, CONST_BUF_LEN(fam_dir->name));
323             fam_dir_invalidate_node(fam_dir);
324             if (scf->dirs)
325                 fam_dir_invalidate_tree(scf->dirs,CONST_BUF_LEN(fam_dir->name));
326             fam_dir_periodic_cleanup(srv);
327             break;
328         default:
329             break;
330         }
331     }
332 }
333 
334 static handler_t stat_cache_handle_fdevent(server *srv, void *_fce, int revent)
335 {
336 	stat_cache_fam *scf = srv->stat_cache->scf;
337 	UNUSED(_fce);
338 
339 	if (revent & FDEVENT_IN) {
340 		stat_cache_handle_fdevent_in(srv, scf);
341 	}
342 
343 	if (revent & (FDEVENT_HUP|FDEVENT_RDHUP)) {
344 		/* fam closed the connection */
345 		log_error_write(srv, __FILE__, __LINE__, "s",
346 				"FAM connection closed; disabling stat_cache.");
347 		/* (although effectively STAT_CACHE_ENGINE_NONE,
348 		 *  do not change here so that periodic jobs clean up memory)*/
349 		/*srv->srvconf.stat_cache_engine = STAT_CACHE_ENGINE_NONE; */
350 		fdevent_fdnode_event_del(srv->ev, scf->fdn);
351 		fdevent_unregister(srv->ev, scf->fd);
352 		scf->fdn = NULL;
353 
354 		FAMClose(&scf->fam);
355 		scf->fd = -1;
356 	}
357 
358 	return HANDLER_GO_ON;
359 }
360 
361 static stat_cache_fam * stat_cache_init_fam(server *srv) {
362 	stat_cache_fam *scf = calloc(1, sizeof(*scf));
363 	force_assert(scf);
364 	scf->fd = -1;
365 
366 	/* setup FAM */
367 	if (0 != FAMOpen2(&scf->fam, "lighttpd")) {
368 		log_error_write(srv, __FILE__, __LINE__, "s",
369 				"could not open a fam connection, dieing.");
370 		return NULL;
371 	}
372       #ifdef HAVE_FAMNOEXISTS
373 	FAMNoExists(&scf->fam);
374       #endif
375 
376 	scf->fd = FAMCONNECTION_GETFD(&scf->fam);
377 	fdevent_setfd_cloexec(scf->fd);
378 	scf->fdn = fdevent_register(srv->ev, scf->fd, stat_cache_handle_fdevent, NULL);
379 	fdevent_fdnode_event_set(srv->ev, scf->fdn, FDEVENT_IN | FDEVENT_RDHUP);
380 
381 	return scf;
382 }
383 
384 static void stat_cache_free_fam(stat_cache_fam *scf) {
385 	if (NULL == scf) return;
386 
387 	while (scf->dirs) {
388 		/*(skip entry invalidation and FAMCancelMonitor())*/
389 		splay_tree *node = scf->dirs;
390 		fam_dir_entry_free((fam_dir_entry *)node->data);
391 		scf->dirs = splaytree_delete(scf->dirs, node->key);
392 	}
393 
394 	if (-1 != scf->fd) {
395 		/*scf->fdn already cleaned up in fdevent_free()*/
396 		FAMClose(&scf->fam);
397 		/*scf->fd = -1;*/
398 	}
399 
400 	free(scf);
401 }
402 
403 static fam_dir_entry * fam_dir_monitor(server *srv, stat_cache_fam *scf, char *fn, size_t dirlen, struct stat *st)
404 {
405     if (NULL == scf->fdn) return NULL; /* FAM connection closed; do nothing */
406     const int fn_is_dir = S_ISDIR(st->st_mode);
407     /*force_assert(0 != dirlen);*/
408     /*force_assert(fn[0] == '/');*/
409     /* consistency: ensure fn does not end in '/' unless root "/"
410      * FAM events will not end in '/', so easier to match this way */
411     if (fn[dirlen-1] == '/') --dirlen;
412     if (0 == dirlen) dirlen = 1; /* root dir ("/") */
413     /* Note: paths are expected to be normalized before calling stat_cache,
414      * e.g. without repeated '/' */
415     if (!fn_is_dir) {
416         while (fn[--dirlen] != '/') ;
417         if (0 == dirlen) dirlen = 1; /*(should not happen for file)*/
418     }
419     int dir_ndx = hashme(fn, dirlen);
420     fam_dir_entry *fam_dir = NULL;
421 
422     scf->dirs = splaytree_splay(scf->dirs, dir_ndx);
423     if (NULL != scf->dirs && scf->dirs->key == dir_ndx) {
424         fam_dir = scf->dirs->data;
425         if (!buffer_is_equal_string(fam_dir->name, fn, dirlen)) {
426             /* hash collision; preserve existing
427              * do not monitor new to avoid cache thrashing */
428             return NULL;
429         }
430         /* directory already registered */
431     }
432 
433     struct stat lst;
434     int ck_dir = fn_is_dir;
435     if (!fn_is_dir && (NULL==fam_dir || srv->cur_ts - fam_dir->stat_ts >= 16)) {
436         ck_dir = 1;
437         /*(temporarily modify fn)*/
438         fn[dirlen] = '\0';
439         if (0 != lstat(fn, &lst)) {
440             fn[dirlen] = '/';
441             return NULL;
442         }
443         if (!S_ISLNK(lst.st_mode)) {
444             st = &lst;
445         }
446         else if (0 != stat(fn, st)) { /*st passed in now is stat() of dir*/
447             fn[dirlen] = '/';
448             return NULL;
449         }
450         fn[dirlen] = '/';
451     }
452 
453     int ck_lnk = (NULL == fam_dir);
454     if (ck_dir && NULL != fam_dir) {
455         /* check stat() matches device and inode, just in case an external event
456          * not being monitored occurs (e.g. rename of unmonitored parent dir)*/
457         if (st->st_dev != fam_dir->st_dev || st->st_ino != fam_dir->st_ino) {
458             ck_lnk = 1;
459             /*(modifies scf->dirs but no need to re-splay for dir_ndx since
460              * fam_dir is not NULL and so splaytree_insert not called below)*/
461             if (scf->dirs) fam_dir_invalidate_tree(scf->dirs, fn, dirlen);
462             if (!fn_is_dir) /*(if dir, caller is updating stat_cache_entry)*/
463                 stat_cache_update_entry(srv, fn, dirlen, st, NULL);
464             /*(must not delete tree since caller is holding a valid node)*/
465             stat_cache_invalidate_dir_tree(srv, fn, dirlen);
466             if (0 != FAMCancelMonitor(&scf->fam, &fam_dir->req)
467                 || 0 != FAMMonitorDirectory(&scf->fam, fam_dir->name->ptr,
468                                             &fam_dir->req,
469                                             (void *)(intptr_t)dir_ndx)) {
470                 fam_dir->stat_ts = 0; /* invalidate */
471                 return NULL;
472             }
473             fam_dir->st_dev = st->st_dev;
474             fam_dir->st_ino = st->st_ino;
475         }
476         fam_dir->stat_ts = srv->cur_ts;
477     }
478 
479     if (NULL == fam_dir) {
480         fam_dir = fam_dir_entry_init(fn, dirlen);
481 
482         if (0 != FAMMonitorDirectory(&scf->fam,fam_dir->name->ptr,&fam_dir->req,
483                                      (void *)(intptr_t)dir_ndx)) {
484             log_error_write(srv, __FILE__, __LINE__, "sbsss",
485                     "monitoring dir failed:",
486                     fam_dir->name,
487                     "file:", fn,
488                     FamErrlist[FAMErrno]);
489             fam_dir_entry_free(fam_dir);
490             return NULL;
491         }
492 
493         scf->dirs = splaytree_insert(scf->dirs, dir_ndx, fam_dir);
494         fam_dir->stat_ts= srv->cur_ts;
495         fam_dir->st_dev = st->st_dev;
496         fam_dir->st_ino = st->st_ino;
497     }
498 
499     if (ck_lnk) {
500         if (fn_is_dir) {
501             /*(temporarily modify fn)*/
502             char e = fn[dirlen];
503             fn[dirlen] = '\0';
504             if (0 != lstat(fn, &lst)) {
505                 fn[dirlen] = e;
506                 return NULL;
507             }
508             fn[dirlen] = e;
509         }
510         if (fam_dir->fam_parent) {
511             --fam_dir->fam_parent->refcnt;
512             fam_dir->fam_parent = NULL;
513         }
514         if (S_ISLNK(lst.st_mode)) {
515             fam_dir->fam_parent = fam_dir_monitor(srv, scf, fn, dirlen, &lst);
516         }
517     }
518 
519     ++fam_dir->refcnt;
520     return fam_dir;
521 }
522 
523 #endif
524 
525 
526 stat_cache *stat_cache_init(server *srv) {
527 	stat_cache *sc = NULL;
528 	UNUSED(srv);
529 
530 	sc = calloc(1, sizeof(*sc));
531 	force_assert(NULL != sc);
532 
533 #ifdef HAVE_FAM_H
534 	if (STAT_CACHE_ENGINE_FAM == srv->srvconf.stat_cache_engine) {
535 		sc->scf = stat_cache_init_fam(srv);
536 		if (NULL == sc->scf) {
537 			free(sc);
538 			return NULL;
539 		}
540 	}
541 #endif
542 
543 	return sc;
544 }
545 
546 static stat_cache_entry * stat_cache_entry_init(void) {
547 	stat_cache_entry *sce = NULL;
548 
549 	sce = calloc(1, sizeof(*sce));
550 	force_assert(NULL != sce);
551 
552 	sce->name = buffer_init();
553 	sce->etag = buffer_init();
554 	sce->content_type = buffer_init();
555 
556 	return sce;
557 }
558 
559 static void stat_cache_entry_free(void *data) {
560 	stat_cache_entry *sce = data;
561 	if (!sce) return;
562 
563       #ifdef HAVE_FAM_H
564 	/*(decrement refcnt only;
565 	 * defer cancelling FAM monitor on dir even if refcnt reaches zero)*/
566 	if (sce->fam_dir) --((fam_dir_entry *)sce->fam_dir)->refcnt;
567       #endif
568 
569 	buffer_free(sce->etag);
570 	buffer_free(sce->name);
571 	buffer_free(sce->content_type);
572 
573 	free(sce);
574 }
575 
576 void stat_cache_free(stat_cache *sc) {
577 	while (sc->files) {
578 		splay_tree *node = sc->files;
579 		stat_cache_entry_free(node->data);
580 		sc->files = splaytree_delete(sc->files, node->key);
581 	}
582 
583 #ifdef HAVE_FAM_H
584 	stat_cache_free_fam(sc->scf);
585 #endif
586 	free(sc);
587 }
588 
589 int stat_cache_choose_engine (server *srv, const buffer *stat_cache_string) {
590 	if (buffer_string_is_empty(stat_cache_string)) {
591 		srv->srvconf.stat_cache_engine = STAT_CACHE_ENGINE_SIMPLE;
592 	} else if (buffer_is_equal_string(stat_cache_string, CONST_STR_LEN("simple"))) {
593 		srv->srvconf.stat_cache_engine = STAT_CACHE_ENGINE_SIMPLE;
594 #ifdef HAVE_FAM_H
595 	} else if (buffer_is_equal_string(stat_cache_string, CONST_STR_LEN("fam"))) {
596 		srv->srvconf.stat_cache_engine = STAT_CACHE_ENGINE_FAM;
597 #endif
598 	} else if (buffer_is_equal_string(stat_cache_string, CONST_STR_LEN("disable"))) {
599 		srv->srvconf.stat_cache_engine = STAT_CACHE_ENGINE_NONE;
600 	} else {
601 		log_error_write(srv, __FILE__, __LINE__, "sb",
602 				"server.stat-cache-engine can be one of \"disable\", \"simple\","
603 #ifdef HAVE_FAM_H
604 				" \"fam\","
605 #endif
606 				" but not:", stat_cache_string);
607 		return -1;
608 	}
609 	return 0;
610 }
611 
612 #if defined(HAVE_XATTR)
613 static int stat_cache_attr_get(buffer *buf, char *name, char *xattrname) {
614 	int attrlen;
615 	int ret;
616 
617 	buffer_string_prepare_copy(buf, 1023);
618 	attrlen = buf->size - 1;
619 	if(0 == (ret = attr_get(name, xattrname, buf->ptr, &attrlen, 0))) {
620 		buffer_commit(buf, attrlen);
621 	}
622 	return ret;
623 }
624 #elif defined(HAVE_EXTATTR)
625 static int stat_cache_attr_get(buffer *buf, char *name, char *xattrname) {
626 	ssize_t attrlen;
627 
628 	buffer_string_prepare_copy(buf, 1023);
629 
630 	if (-1 != (attrlen = extattr_get_file(name, EXTATTR_NAMESPACE_USER, xattrname, buf->ptr, buf->size - 1))) {
631 		buf->used = attrlen + 1;
632 		buf->ptr[attrlen] = '\0';
633 		return 0;
634 	}
635 	return -1;
636 }
637 #endif
638 
639 const buffer * stat_cache_mimetype_by_ext(const connection *con, const char *name, size_t nlen)
640 {
641     const char *end = name + nlen; /*(end of string)*/
642     const size_t used = con->conf.mimetypes->used;
643     if (used < 16) {
644         for (size_t i = 0; i < used; ++i) {
645             /* suffix match */
646             const data_string *ds = (data_string *)con->conf.mimetypes->data[i];
647             const size_t klen = buffer_string_length(ds->key);
648             if (klen <= nlen && buffer_eq_icase_ssn(end-klen, ds->key->ptr, klen))
649                 return ds->value;
650         }
651     }
652     else {
653         const char *s;
654         const data_string *ds;
655         if (nlen) {
656             for (s = end-1; s != name && *s != '/'; --s) ; /*(like memrchr())*/
657             if (*s == '/') ++s;
658         }
659         else {
660             s = name;
661         }
662         /* search for basename, then longest .ext2.ext1, then .ext1, then "" */
663         ds = (data_string *)array_get_element_klen(con->conf.mimetypes, s, end - s);
664         if (NULL != ds) return ds->value;
665         while (++s < end) {
666             while (*s != '.' && ++s != end) ;
667             if (s == end) break;
668             /* search ".ext" then "ext" */
669             ds = (data_string *)array_get_element_klen(con->conf.mimetypes, s, end - s);
670             if (NULL != ds) return ds->value;
671             /* repeat search without leading '.' to handle situation where
672              * admin configured mimetype.assign keys without leading '.' */
673             if (++s < end) {
674                 if (*s == '.') { --s; continue; }
675                 ds = (data_string *)array_get_element_klen(con->conf.mimetypes, s, end - s);
676                 if (NULL != ds) return ds->value;
677             }
678         }
679         /* search for ""; catchall */
680         ds = (data_string *)array_get_element(con->conf.mimetypes, "");
681         if (NULL != ds) return ds->value;
682     }
683 
684     return NULL;
685 }
686 
687 const buffer * stat_cache_content_type_get(server *srv, connection *con, const buffer *name, stat_cache_entry *sce)
688 {
689     /*(invalid caching if user config has multiple, different
690      * con->conf.mimetypes for same extension (not expected))*/
691     if (!buffer_string_is_empty(sce->content_type)) return sce->content_type;
692 
693     if (S_ISREG(sce->st.st_mode)) {
694         /* determine mimetype */
695         buffer_clear(sce->content_type);
696       #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
697         if (con->conf.use_xattr) {
698             stat_cache_attr_get(sce->content_type, name->ptr, srv->srvconf.xattr_name->ptr);
699         }
700       #else
701         UNUSED(srv);
702       #endif
703         /* xattr did not set a content-type. ask the config */
704         if (buffer_string_is_empty(sce->content_type)) {
705             const buffer *type = stat_cache_mimetype_by_ext(con, CONST_BUF_LEN(name));
706             if (NULL != type) {
707                 buffer_copy_buffer(sce->content_type, type);
708             }
709         }
710         return sce->content_type;
711     }
712 
713     return NULL;
714 }
715 
716 const buffer * stat_cache_etag_get(stat_cache_entry *sce, etag_flags_t flags) {
717     /*(invalid caching if user config has multiple, different con->etag_flags
718      * for same path (not expected, since etag flags should be by filesystem))*/
719     if (!buffer_string_is_empty(sce->etag)) return sce->etag;
720 
721     if (S_ISREG(sce->st.st_mode) || S_ISDIR(sce->st.st_mode)) {
722         etag_create(sce->etag, &sce->st, flags);
723         return sce->etag;
724     }
725 
726     return NULL;
727 }
728 
729 void stat_cache_update_entry(server *srv, const char *name, size_t len,
730                              struct stat *st, buffer *etagb)
731 {
732     if (srv->srvconf.stat_cache_engine == STAT_CACHE_ENGINE_NONE) return;
733     force_assert(0 != len);
734     if (name[len-1] == '/') { if (0 == --len) len = 1; }
735     splay_tree **sptree = &srv->stat_cache->files;
736     stat_cache_entry *sce =
737       stat_cache_sptree_find(sptree, name, len);
738     if (sce && buffer_is_equal_string(sce->name, name, len)) {
739         sce->stat_ts = srv->cur_ts;
740         sce->st = *st; /* etagb might be NULL to clear etag (invalidate) */
741         buffer_copy_string_len(sce->etag, CONST_BUF_LEN(etagb));
742       #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
743         buffer_clear(sce->content_type);
744       #endif
745     }
746 }
747 
748 void stat_cache_delete_entry(server *srv, const char *name, size_t len)
749 {
750     if (srv->srvconf.stat_cache_engine == STAT_CACHE_ENGINE_NONE) return;
751     force_assert(0 != len);
752     if (name[len-1] == '/') { if (0 == --len) len = 1; }
753     splay_tree **sptree = &srv->stat_cache->files;
754     stat_cache_entry *sce = stat_cache_sptree_find(sptree, name, len);
755     if (sce && buffer_is_equal_string(sce->name, name, len)) {
756         stat_cache_entry_free(sce);
757         *sptree = splaytree_delete(*sptree, (*sptree)->key);
758     }
759 }
760 
761 void stat_cache_invalidate_entry(server *srv, const char *name, size_t len)
762 {
763     splay_tree **sptree = &srv->stat_cache->files;
764     stat_cache_entry *sce = stat_cache_sptree_find(sptree, name, len);
765     if (sce && buffer_is_equal_string(sce->name, name, len)) {
766         sce->stat_ts = 0;
767       #ifdef HAVE_FAM_H
768         if (sce->fam_dir != NULL) {
769             --((fam_dir_entry *)sce->fam_dir)->refcnt;
770             sce->fam_dir = NULL;
771         }
772       #endif
773     }
774 }
775 
776 #ifdef HAVE_FAM_H
777 
778 static void stat_cache_invalidate_dir_tree_walk(splay_tree *t,
779                                                 const char *name, size_t len)
780 {
781     if (t->left)  stat_cache_invalidate_dir_tree_walk(t->left,  name, len);
782     if (t->right) stat_cache_invalidate_dir_tree_walk(t->right, name, len);
783 
784     buffer *b = ((stat_cache_entry *)t->data)->name;
785     size_t blen = buffer_string_length(b);
786     if (blen > len && b->ptr[len] == '/' && 0 == memcmp(b->ptr, name, len)) {
787         stat_cache_entry *sce = t->data;
788         sce->stat_ts = 0;
789         if (sce->fam_dir != NULL) {
790             --((fam_dir_entry *)sce->fam_dir)->refcnt;
791             sce->fam_dir = NULL;
792         }
793     }
794 }
795 
796 static void stat_cache_invalidate_dir_tree(server *srv,
797                                            const char *name, size_t len)
798 {
799     splay_tree *sptree = srv->stat_cache->files;
800     if (sptree) stat_cache_invalidate_dir_tree_walk(sptree, name, len);
801 }
802 
803 #endif
804 
805 /*
806  * walk though splay_tree and collect contents of dir tree.
807  * remove tagged entries in a second loop
808  */
809 
810 static void stat_cache_tag_dir_tree(splay_tree *t, const char *name, size_t len,
811                                     int *keys, int *ndx)
812 {
813     if (*ndx == 8192) return; /*(must match num array entries in keys[])*/
814     if (t->left)  stat_cache_tag_dir_tree(t->left,  name, len, keys, ndx);
815     if (t->right) stat_cache_tag_dir_tree(t->right, name, len, keys, ndx);
816     if (*ndx == 8192) return; /*(must match num array entries in keys[])*/
817 
818     buffer *b = ((stat_cache_entry *)t->data)->name;
819     size_t blen = buffer_string_length(b);
820     if (blen > len && b->ptr[len] == '/' && 0 == memcmp(b->ptr, name, len))
821         keys[(*ndx)++] = t->key;
822 }
823 
824 static void stat_cache_prune_dir_tree(stat_cache * const sc,
825                                       const char *name, size_t len)
826 {
827     int max_ndx, i;
828     int keys[8192]; /* 32k size on stack */
829     do {
830         if (!sc->files) return;
831         max_ndx = 0;
832         stat_cache_tag_dir_tree(sc->files, name, len, keys, &max_ndx);
833         for (i = 0; i < max_ndx; ++i) {
834             const int ndx = keys[i];
835             splay_tree *node = sc->files = splaytree_splay(sc->files, ndx);
836             if (node && node->key == ndx) {
837                 stat_cache_entry_free(node->data);
838                 sc->files = splaytree_delete(sc->files, ndx);
839             }
840         }
841     } while (max_ndx == sizeof(keys)/sizeof(int));
842 }
843 
844 static void stat_cache_delete_tree(server *srv, const char *name, size_t len)
845 {
846     stat_cache_delete_entry(srv, name, len);
847     stat_cache_prune_dir_tree(srv->stat_cache, name, len);
848 }
849 
850 void stat_cache_delete_dir(server *srv, const char *name, size_t len)
851 {
852     force_assert(0 != len);
853     if (name[len-1] == '/') { if (0 == --len) len = 1; }
854     stat_cache_delete_tree(srv, name, len);
855   #ifdef HAVE_FAM_H
856     if (srv->srvconf.stat_cache_engine == STAT_CACHE_ENGINE_FAM) {
857         splay_tree **sptree = &srv->stat_cache->scf->dirs;
858         fam_dir_entry *fam_dir = stat_cache_sptree_find(sptree, name, len);
859         if (fam_dir && buffer_is_equal_string(fam_dir->name, name, len))
860             fam_dir_invalidate_node(fam_dir);
861         if (*sptree) fam_dir_invalidate_tree(*sptree, name, len);
862         fam_dir_periodic_cleanup(srv);
863     }
864   #endif
865 }
866 
867 /***
868  *
869  *
870  *
871  * returns:
872  *  - HANDLER_FINISHED on cache-miss (don't forget to reopen the file)
873  *  - HANDLER_ERROR on stat() failed -> see errno for problem
874  */
875 
876 handler_t stat_cache_get_entry(server *srv, connection *con, buffer *name, stat_cache_entry **ret_sce) {
877 	stat_cache_entry *sce = NULL;
878 	stat_cache *sc;
879 	struct stat st;
880 	int file_ndx;
881 	UNUSED(con);
882 
883 	*ret_sce = NULL;
884 
885 	/* consistency: ensure lookup name does not end in '/' unless root "/"
886 	 * (but use full path given with stat(), even with trailing '/') */
887 	int final_slash = 0;
888 	size_t len = buffer_string_length(name);
889 	force_assert(0 != len);
890 	if (name->ptr[len-1] == '/') { final_slash = 1; if (0 == --len) len = 1; }
891 	/* Note: paths are expected to be normalized before calling stat_cache,
892 	 * e.g. without repeated '/' */
893 
894 	if (name->ptr[0] != '/') return HANDLER_ERROR;
895 
896 	/*
897 	 * check if the directory for this file has changed
898 	 */
899 
900 	sc = srv->stat_cache;
901 
902 	file_ndx = hashme(name->ptr, len);
903 	sc->files = splaytree_splay(sc->files, file_ndx);
904 
905 	if (sc->files && (sc->files->key == file_ndx)) {
906 		/* we have seen this file already and
907 		 * don't stat() it again in the same second */
908 
909 		sce = sc->files->data;
910 
911 		/* check if the name is the same, we might have a collision */
912 
913 		if (buffer_is_equal_string(sce->name, name->ptr, len)) {
914 			if (srv->srvconf.stat_cache_engine == STAT_CACHE_ENGINE_SIMPLE) {
915 				if (sce->stat_ts == srv->cur_ts) {
916 					if (final_slash && !S_ISDIR(sce->st.st_mode)) {
917 						errno = ENOTDIR;
918 						return HANDLER_ERROR;
919 					}
920 					*ret_sce = sce;
921 					return HANDLER_GO_ON;
922 				}
923 			}
924 		      #ifdef HAVE_FAM_H
925 			else if (srv->srvconf.stat_cache_engine == STAT_CACHE_ENGINE_FAM
926 				 && sce->fam_dir) { /* entry is in monitored dir */
927 				/* re-stat() periodically, even if monitoring for changes
928 				 * (due to limitations in stat_cache.c use of FAM)
929 				 * (gaps due to not continually monitoring an entire tree) */
930 				if (srv->cur_ts - sce->stat_ts < 16) {
931 					if (final_slash && !S_ISDIR(sce->st.st_mode)) {
932 						errno = ENOTDIR;
933 						return HANDLER_ERROR;
934 					}
935 					*ret_sce = sce;
936 					return HANDLER_GO_ON;
937 				}
938 			}
939 		      #endif
940 		} else {
941 			/* collision, forget about the entry */
942 			sce = NULL;
943 		}
944 	}
945 
946 	if (-1 == stat(name->ptr, &st)) {
947 		return HANDLER_ERROR;
948 	}
949 
950 	if (S_ISREG(st.st_mode)) {
951 		/* fix broken stat/open for symlinks to reg files with appended slash on freebsd,osx */
952 		if (name->ptr[buffer_string_length(name) - 1] == '/') {
953 			errno = ENOTDIR;
954 			return HANDLER_ERROR;
955 		}
956 	}
957 
958 	if (NULL == sce) {
959 
960 		sce = stat_cache_entry_init();
961 		buffer_copy_string_len(sce->name, name->ptr, len);
962 
963 		/* already splayed file_ndx */
964 		if ((NULL != sc->files) && (sc->files->key == file_ndx)) {
965 			/* hash collision: replace old entry */
966 			stat_cache_entry_free(sc->files->data);
967 			sc->files->data = sce;
968 		} else {
969 			sc->files = splaytree_insert(sc->files, file_ndx, sce);
970 		}
971 
972 	} else {
973 
974 		buffer_clear(sce->etag);
975 	      #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
976 		buffer_clear(sce->content_type);
977 	      #endif
978 
979 	}
980 
981 	sce->st = st; /*(copy prior to calling fam_dir_monitor())*/
982 
983 #ifdef HAVE_FAM_H
984 	if (srv->srvconf.stat_cache_engine == STAT_CACHE_ENGINE_FAM) {
985 		if (sce->fam_dir) --((fam_dir_entry *)sce->fam_dir)->refcnt;
986 		sce->fam_dir =
987 		  fam_dir_monitor(srv, sc->scf, CONST_BUF_LEN(name), &st);
988 	      #if 0 /*(performed below)*/
989 		if (NULL != sce->fam_dir) {
990 			/*(may have been invalidated by dir change)*/
991 			sce->stat_ts = srv->cur_ts;
992 		}
993 	      #endif
994 	}
995 #endif
996 
997 	sce->stat_ts = srv->cur_ts;
998 	*ret_sce = sce;
999 
1000 	return HANDLER_GO_ON;
1001 }
1002 
1003 int stat_cache_path_contains_symlink(server *srv, buffer *name) {
1004     /* caller should check for symlinks only if we should block symlinks. */
1005 
1006     /* catch the obvious symlinks
1007      *
1008      * this is not a secure check as we still have a race-condition between
1009      * the stat() and the open. We can only solve this by
1010      * 1. open() the file
1011      * 2. fstat() the fd
1012      *
1013      * and keeping the file open for the rest of the time. But this can
1014      * only be done at network level.
1015      * */
1016 
1017   #ifdef HAVE_LSTAT
1018     /* we assume "/" can not be symlink,
1019      * so skip the symlink stuff if path is "/" */
1020     size_t len = buffer_string_length(name);
1021     force_assert(0 != len);
1022     force_assert(name->ptr[0] == '/');
1023     if (1 == len) return 0;
1024    #ifndef PATH_MAX
1025    #define PATH_MAX 4096
1026    #endif
1027     if (len >= PATH_MAX) return -1;
1028 
1029     char buf[PATH_MAX];
1030     memcpy(buf, name->ptr, len);
1031     char *s_cur = buf+len;
1032     do {
1033         *s_cur = '\0';
1034         struct stat st;
1035         if (0 == lstat(buf, &st)) {
1036             if (S_ISLNK(st.st_mode)) return 1;
1037         }
1038         else {
1039             log_error_write(srv, __FILE__, __LINE__, "sss",
1040                             "lstat failed for:", buf, strerror(errno));
1041             return -1;
1042         }
1043     } while ((s_cur = strrchr(buf, '/')) != buf);
1044   #endif
1045 
1046     return 0;
1047 }
1048 
1049 int stat_cache_open_rdonly_fstat (buffer *name, struct stat *st, int symlinks) {
1050 	/*(Note: O_NOFOLLOW affects only the final path segment, the target file,
1051 	 * not any intermediate symlinks along the path)*/
1052 	const int fd = fdevent_open_cloexec(name->ptr, symlinks, O_RDONLY, 0);
1053 	if (fd >= 0) {
1054 		if (0 == fstat(fd, st)) {
1055 			return fd;
1056 		} else {
1057 			close(fd);
1058 		}
1059 	}
1060 	return -1;
1061 }
1062 
1063 /**
1064  * remove stat() from cache which haven't been stat()ed for
1065  * more than 2 seconds
1066  *
1067  *
1068  * walk though the stat-cache, collect the ids which are too old
1069  * and remove them in a second loop
1070  */
1071 
1072 static int stat_cache_tag_old_entries(server *srv, splay_tree *t, int *keys, size_t *ndx, time_t max_age) {
1073 	stat_cache_entry *sce;
1074 
1075 	if (!t) return 0;
1076 
1077 	stat_cache_tag_old_entries(srv, t->left, keys, ndx, max_age);
1078 	stat_cache_tag_old_entries(srv, t->right, keys, ndx, max_age);
1079 
1080 	sce = t->data;
1081 
1082 	if (srv->cur_ts - sce->stat_ts > max_age) {
1083 		keys[(*ndx)++] = t->key;
1084 	}
1085 
1086 	return 0;
1087 }
1088 
1089 static int stat_cache_periodic_cleanup(server *srv, time_t max_age) {
1090 	stat_cache *sc;
1091 	size_t max_ndx = 0, i;
1092 	int *keys;
1093 
1094 	sc = srv->stat_cache;
1095 
1096 	if (!sc->files) return 0;
1097 
1098 	keys = calloc(1, sizeof(int) * sc->files->size);
1099 	force_assert(NULL != keys);
1100 
1101 	stat_cache_tag_old_entries(srv, sc->files, keys, &max_ndx, max_age);
1102 
1103 	for (i = 0; i < max_ndx; i++) {
1104 		int ndx = keys[i];
1105 		splay_tree *node;
1106 
1107 		sc->files = splaytree_splay(sc->files, ndx);
1108 
1109 		node = sc->files;
1110 
1111 		if (node && (node->key == ndx)) {
1112 			stat_cache_entry_free(node->data);
1113 			sc->files = splaytree_delete(sc->files, ndx);
1114 		}
1115 	}
1116 
1117 	free(keys);
1118 
1119 	return 0;
1120 }
1121 
1122 int stat_cache_trigger_cleanup(server *srv) {
1123 	time_t max_age = 2;
1124 
1125       #ifdef HAVE_FAM_H
1126 	if (srv->srvconf.stat_cache_engine == STAT_CACHE_ENGINE_FAM) {
1127 		if (srv->cur_ts & 0x1F) return 0;
1128 		/* once every 32 seconds (0x1F == 31) */
1129 		max_age = 32;
1130 		fam_dir_periodic_cleanup(srv);
1131 		/* By doing this before stat_cache_periodic_cleanup(),
1132 		 * entries used within the next max_age secs will remain
1133 		 * monitored, instead of effectively flushing and
1134 		 * rebuilding the FAM monitoring every max_age seconds */
1135 	}
1136       #endif
1137 
1138 	stat_cache_periodic_cleanup(srv, max_age);
1139 
1140 	return 0;
1141 }
1142