xref: /lighttpd1.4/src/stat_cache.c (revision c752d469)
1 #include "first.h"
2 
3 #include "stat_cache.h"
4 #include "log.h"
5 #include "fdevent.h"
6 #include "etag.h"
7 #include "splaytree.h"
8 
9 #include <sys/types.h>
10 #include <sys/stat.h>
11 
12 #include <stdlib.h>
13 #include <string.h>
14 #include <errno.h>
15 #include <unistd.h>
16 #include <fcntl.h>
17 
18 #ifdef HAVE_ATTR_ATTRIBUTES_H
19 # include <attr/attributes.h>
20 #endif
21 
22 #ifdef HAVE_SYS_EXTATTR_H
23 # include <sys/extattr.h>
24 #endif
25 
26 #ifndef HAVE_LSTAT
27 #define lstat stat
28 #ifndef S_ISLNK
29 #define S_ISLNK(mode) (0)
30 #endif
31 #endif
32 
33 /*
34  * stat-cache
35  *
36  * - a splay-tree is used as we can use the caching effect of it
37  */
38 
39 enum {
40   STAT_CACHE_ENGINE_SIMPLE, /*(default)*/
41   STAT_CACHE_ENGINE_NONE,
42   STAT_CACHE_ENGINE_FAM
43 };
44 
45 struct stat_cache_fam;  /* declaration */
46 
47 typedef struct stat_cache {
48 	int stat_cache_engine;
49 	splay_tree *files; /* nodes of tree are (stat_cache_entry *) */
50 	struct stat_cache_fam *scf;
51 } stat_cache;
52 
53 static stat_cache sc;
54 
55 
56 /* the famous DJB hash function for strings */
57 __attribute_pure__
58 static uint32_t djbhash(const char *str, const size_t len)
59 {
60     const unsigned char * const s = (const unsigned char *)str;
61     uint32_t hash = 5381;
62     for (size_t i = 0; i < len; ++i) hash = ((hash << 5) + hash) ^ s[i];
63     return hash;
64 }
65 
66 
67 __attribute_pure__
68 static uint32_t hashme(const char *str, const size_t len)
69 {
70     /* strip highest bit of hash value for splaytree */
71     return djbhash(str,len) & ~(((uint32_t)1) << 31);
72 }
73 
74 
75 static void * stat_cache_sptree_find(splay_tree ** const sptree,
76                                      const char * const name,
77                                      size_t len)
78 {
79     const int ndx = hashme(name, len);
80     *sptree = splaytree_splay(*sptree, ndx);
81     return (*sptree && (*sptree)->key == ndx) ? (*sptree)->data : NULL;
82 }
83 
84 
85 #ifdef HAVE_FAM_H
86 
87 /* monitor changes in directories using FAM
88  *
89  * This implementation employing FAM monitors directories as they are used,
90  * and maintains a reference count for cache use within stat_cache.c.
91  * A periodic job runs in lighttpd every 32 seconds, expiring entries unused
92  * in last 64 seconds out of the cache and cancelling FAM monitoring.  Items
93  * within the cache are checked against the filesystem upon use if last stat()
94  * was greater than or equal to 16 seconds ago.
95  *
96  * This implementation does not monitor every directory in a tree, and therefore
97  * the cache may get out-of-sync with the filesystem.  Delays in receiving and
98  * processing events from FAM might also lead to stale cache entries.
99  *
100  * For many websites, a large number of files are seldom, if ever, modified,
101  * and a common practice with images is to create a new file with a new name
102  * when a new version is needed, in order for client browsers and CDNs to better
103  * cache the content.  Given this, most use will see little difference in
104  * performance between server.stat-cache-engine = "fam" and "simple" (default).
105  * The default server.stat-cache-engine = "simple" calls stat() on a target once
106  * per second, and reuses that information until the next second.  For use where
107  * changes must be immediately visible, server.stat-cache-engine = "disable"
108  * should be used.
109  *
110  * When considering use of server.stat-cache-engine = "fam", there are a few
111  * additional limitations for this cache implementation using FAM.
112  * - symlinks to files located outside of the current directory do not result
113  *   in changes to that file being monitored (unless that file is in a directory
114  *   which is monitored as a result of a different request).  symlinks can be
115  *   chained and can be circular.  This implementation *does not* readlink() or
116  *   realpath() to resolve the chains to find and monitor the ultimate target
117  *   directory.  While symlinks to files located outside the current directory
118  *   are not monitored, symlinks to directories *are* monitored, though chains
119  *   of symlinks to directories do not result in monitoring of the directories
120  *   containing intermediate symlinks to the target directory.
121  * - directory rename of a directory which is not currently being monitored will
122  *   result in stale information in the cache if there is a subdirectory that is
123  *   being monitored.
124  * Even though lighttpd will not receive FAM events in the above cases, lighttpd
125  * does re-validate the information in the cache upon use if the cache entry has
126  * not been checked in 16 seconds, so that is the upper limit for use of stale
127  * data.
128  *
129  * Use of server.stat-cache-engine = "fam" is discouraged for extremely volatile
130  * directories such as temporary directories (e.g. /tmp and maybe /var/tmp) due
131  * to the overhead of processing the additional noise generated from changes.
132  * Related, server.stat-cache-engine = "fam" is not recommended on trees of
133  * untrusted files where a malicious user could generate an excess of change
134  * events.
135  *
136  * Internal note: lighttpd walks the caches to prune trees in stat_cache when an
137  * event is received for a directory (or symlink to a directory) which has been
138  * deleted or renamed.  The splaytree data structure is suboptimal for frequent
139  * changes of large directories trees where there have been a large number of
140  * different files recently accessed and part of the stat_cache.
141  */
142 
143 #include <fam.h>
144 
145 typedef struct fam_dir_entry {
146 	buffer *name;
147 	int refcnt;
148 	FAMRequest req;
149 	time_t stat_ts;
150 	dev_t st_dev;
151 	ino_t st_ino;
152 	struct fam_dir_entry *fam_parent;
153 } fam_dir_entry;
154 
155 typedef struct stat_cache_fam {
156 	splay_tree *dirs; /* the nodes of the tree are fam_dir_entry */
157 	FAMConnection fam;
158 	log_error_st *errh;
159 	fdevents *ev;
160 	fdnode *fdn;
161 	int fd;
162 } stat_cache_fam;
163 
164 static fam_dir_entry * fam_dir_entry_init(const char *name, size_t len)
165 {
166     fam_dir_entry * const fam_dir = calloc(1, sizeof(*fam_dir));
167     force_assert(NULL != fam_dir);
168 
169     fam_dir->name = buffer_init();
170     buffer_copy_string_len(fam_dir->name, name, len);
171     fam_dir->refcnt = 0;
172 
173     return fam_dir;
174 }
175 
176 static void fam_dir_entry_free(fam_dir_entry *fam_dir)
177 {
178     if (!fam_dir) return;
179     /*(fam_dir->parent might be invalid pointer here; ignore)*/
180     buffer_free(fam_dir->name);
181     free(fam_dir);
182 }
183 
184 static void fam_dir_invalidate_node(fam_dir_entry *fam_dir)
185 {
186     fam_dir->stat_ts = 0;
187     if (fam_dir->fam_parent) {
188         --fam_dir->fam_parent->refcnt;
189         fam_dir->fam_parent = NULL;
190     }
191 }
192 
193 /*
194  * walk though splay_tree and collect contents of dir tree.
195  * remove tagged entries in a second loop
196  */
197 
198 static void fam_dir_tag_refcnt(splay_tree *t, int *keys, int *ndx)
199 {
200     if (*ndx == 8192) return; /*(must match num array entries in keys[])*/
201     if (t->left)  fam_dir_tag_refcnt(t->left,  keys, ndx);
202     if (t->right) fam_dir_tag_refcnt(t->right, keys, ndx);
203     if (*ndx == 8192) return; /*(must match num array entries in keys[])*/
204 
205     fam_dir_entry * const fam_dir = t->data;
206     if (0 == fam_dir->refcnt) {
207         fam_dir_invalidate_node(fam_dir);
208         keys[(*ndx)++] = t->key;
209     }
210 }
211 
212 static void fam_dir_periodic_cleanup() {
213     int max_ndx, i;
214     int keys[8192]; /* 32k size on stack */
215     stat_cache_fam * const scf = sc.scf;
216     do {
217         if (!scf->dirs) return;
218         max_ndx = 0;
219         fam_dir_tag_refcnt(scf->dirs, keys, &max_ndx);
220         for (i = 0; i < max_ndx; ++i) {
221             const int ndx = keys[i];
222             splay_tree *node = scf->dirs = splaytree_splay(scf->dirs, ndx);
223             if (node && node->key == ndx) {
224                 fam_dir_entry *fam_dir = node->data;
225                 scf->dirs = splaytree_delete(scf->dirs, ndx);
226                 FAMCancelMonitor(&scf->fam, &fam_dir->req);
227                 fam_dir_entry_free(fam_dir);
228             }
229         }
230     } while (max_ndx == sizeof(keys)/sizeof(int));
231 }
232 
233 static void fam_dir_invalidate_tree(splay_tree *t, const char *name, size_t len)
234 {
235     /*force_assert(t);*/
236     if (t->left)  fam_dir_invalidate_tree(t->left,  name, len);
237     if (t->right) fam_dir_invalidate_tree(t->right, name, len);
238 
239     fam_dir_entry * const fam_dir = t->data;
240     buffer *b = fam_dir->name;
241     size_t blen = buffer_string_length(b);
242     if (blen > len && b->ptr[len] == '/' && 0 == memcmp(b->ptr, name, len))
243         fam_dir_invalidate_node(fam_dir);
244 }
245 
246 /* declarations */
247 static void stat_cache_delete_tree(const char *name, size_t len);
248 static void stat_cache_invalidate_dir_tree(const char *name, size_t len);
249 
250 static void stat_cache_handle_fdevent_in(stat_cache_fam *scf)
251 {
252     for (int i = 0, ndx; i || (i = FAMPending(&scf->fam)) > 0; --i) {
253         FAMEvent fe;
254         if (FAMNextEvent(&scf->fam, &fe) < 0) break;
255 
256         /* ignore events which may have been pending for
257          * paths recently cancelled via FAMCancelMonitor() */
258         ndx = (int)(intptr_t)fe.userdata;
259         scf->dirs = splaytree_splay(scf->dirs, ndx);
260         if (!scf->dirs || scf->dirs->key != ndx) {
261             continue;
262         }
263         fam_dir_entry *fam_dir = scf->dirs->data;
264         if (FAMREQUEST_GETREQNUM(&fam_dir->req)
265             != FAMREQUEST_GETREQNUM(&fe.fr)) {
266             continue;
267         }
268 
269         if (fe.filename[0] != '/') {
270             buffer * const n = fam_dir->name;
271             fam_dir_entry *fam_link;
272             size_t len;
273             switch(fe.code) {
274             case FAMCreated:
275                 /* file created in monitored dir modifies dir and
276                  * we should get a separate FAMChanged event for dir.
277                  * Therefore, ignore file FAMCreated event here.
278                  * Also, if FAMNoExists() is used, might get spurious
279                  * FAMCreated events as changes are made e.g. in monitored
280                  * sub-sub-sub dirs and the library discovers new (already
281                  * existing) dir entries */
282                 continue;
283             case FAMChanged:
284                 /* file changed in monitored dir does not modify dir */
285             case FAMDeleted:
286             case FAMMoved:
287                 /* file deleted or moved in monitored dir modifies dir,
288                  * but FAM provides separate notification for that */
289 
290                 /* temporarily append filename to dir in fam_dir->name to
291                  * construct path, then delete stat_cache entry (if any)*/
292                 len = buffer_string_length(n);
293                 buffer_append_string_len(n, CONST_STR_LEN("/"));
294                 buffer_append_string_len(n,fe.filename,strlen(fe.filename));
295                 /* (alternatively, could chose to stat() and update)*/
296                 stat_cache_invalidate_entry(CONST_BUF_LEN(n));
297 
298                 fam_link = /*(check if might be symlink to monitored dir)*/
299                   stat_cache_sptree_find(&scf->dirs, CONST_BUF_LEN(n));
300                 if (fam_link && !buffer_is_equal(fam_link->name, n))
301                     fam_link = NULL;
302 
303                 buffer_string_set_length(n, len);
304 
305                 if (fam_link) {
306                     /* replaced symlink changes containing dir */
307                     stat_cache_invalidate_entry(CONST_BUF_LEN(n));
308                     /* handle symlink to dir as deleted dir below */
309                     fe.code = FAMDeleted;
310                     fam_dir = fam_link;
311                     break;
312                 }
313                 continue;
314             default:
315                 continue;
316             }
317         }
318 
319         switch(fe.code) {
320         case FAMChanged:
321             stat_cache_invalidate_entry(CONST_BUF_LEN(fam_dir->name));
322             break;
323         case FAMDeleted:
324         case FAMMoved:
325             stat_cache_delete_tree(CONST_BUF_LEN(fam_dir->name));
326             fam_dir_invalidate_node(fam_dir);
327             if (scf->dirs)
328                 fam_dir_invalidate_tree(scf->dirs,CONST_BUF_LEN(fam_dir->name));
329             fam_dir_periodic_cleanup();
330             break;
331         default:
332             break;
333         }
334     }
335 }
336 
337 static handler_t stat_cache_handle_fdevent(void *ctx, int revent)
338 {
339 	stat_cache_fam * const scf = ctx; /* sc.scf */
340 
341 	if (revent & FDEVENT_IN) {
342 		stat_cache_handle_fdevent_in(scf);
343 	}
344 
345 	if (revent & (FDEVENT_HUP|FDEVENT_RDHUP)) {
346 		/* fam closed the connection */
347 		log_error(scf->errh, __FILE__, __LINE__,
348 		  "FAM connection closed; disabling stat_cache.");
349 		/* (although effectively STAT_CACHE_ENGINE_NONE,
350 		 *  do not change here so that periodic jobs clean up memory)*/
351 		/*sc.stat_cache_engine = STAT_CACHE_ENGINE_NONE; */
352 		fdevent_fdnode_event_del(scf->ev, scf->fdn);
353 		fdevent_unregister(scf->ev, scf->fd);
354 		scf->fdn = NULL;
355 
356 		FAMClose(&scf->fam);
357 		scf->fd = -1;
358 	}
359 
360 	return HANDLER_GO_ON;
361 }
362 
363 static stat_cache_fam * stat_cache_init_fam(fdevents *ev, log_error_st *errh) {
364 	stat_cache_fam *scf = calloc(1, sizeof(*scf));
365 	force_assert(scf);
366 	scf->fd = -1;
367 	scf->ev = ev;
368 	scf->errh = errh;
369 
370 	/* setup FAM */
371 	if (0 != FAMOpen2(&scf->fam, "lighttpd")) {
372 		log_error(errh, __FILE__, __LINE__,
373 		  "could not open a fam connection, dying.");
374 		return NULL;
375 	}
376       #ifdef HAVE_FAMNOEXISTS
377 	FAMNoExists(&scf->fam);
378       #endif
379 
380 	scf->fd = FAMCONNECTION_GETFD(&scf->fam);
381 	fdevent_setfd_cloexec(scf->fd);
382 	scf->fdn = fdevent_register(scf->ev, scf->fd, stat_cache_handle_fdevent, scf);
383 	fdevent_fdnode_event_set(scf->ev, scf->fdn, FDEVENT_IN | FDEVENT_RDHUP);
384 
385 	return scf;
386 }
387 
388 static void stat_cache_free_fam(stat_cache_fam *scf) {
389 	if (NULL == scf) return;
390 
391 	while (scf->dirs) {
392 		/*(skip entry invalidation and FAMCancelMonitor())*/
393 		splay_tree *node = scf->dirs;
394 		fam_dir_entry_free((fam_dir_entry *)node->data);
395 		scf->dirs = splaytree_delete(scf->dirs, node->key);
396 	}
397 
398 	if (-1 != scf->fd) {
399 		/*scf->fdn already cleaned up in fdevent_free()*/
400 		FAMClose(&scf->fam);
401 		/*scf->fd = -1;*/
402 	}
403 
404 	free(scf);
405 }
406 
407 static fam_dir_entry * fam_dir_monitor(stat_cache_fam *scf, char *fn, size_t dirlen, struct stat *st)
408 {
409     if (NULL == scf->fdn) return NULL; /* FAM connection closed; do nothing */
410     const int fn_is_dir = S_ISDIR(st->st_mode);
411     /*force_assert(0 != dirlen);*/
412     /*force_assert(fn[0] == '/');*/
413     /* consistency: ensure fn does not end in '/' unless root "/"
414      * FAM events will not end in '/', so easier to match this way */
415     if (fn[dirlen-1] == '/') --dirlen;
416     if (0 == dirlen) dirlen = 1; /* root dir ("/") */
417     /* Note: paths are expected to be normalized before calling stat_cache,
418      * e.g. without repeated '/' */
419     if (!fn_is_dir) {
420         while (fn[--dirlen] != '/') ;
421         if (0 == dirlen) dirlen = 1; /*(should not happen for file)*/
422     }
423     int dir_ndx = hashme(fn, dirlen);
424     fam_dir_entry *fam_dir = NULL;
425 
426     scf->dirs = splaytree_splay(scf->dirs, dir_ndx);
427     if (NULL != scf->dirs && scf->dirs->key == dir_ndx) {
428         fam_dir = scf->dirs->data;
429         if (!buffer_is_equal_string(fam_dir->name, fn, dirlen)) {
430             /* hash collision; preserve existing
431              * do not monitor new to avoid cache thrashing */
432             return NULL;
433         }
434         /* directory already registered */
435     }
436 
437     const time_t cur_ts = log_epoch_secs;
438     struct stat lst;
439     int ck_dir = fn_is_dir;
440     if (!fn_is_dir && (NULL==fam_dir || cur_ts - fam_dir->stat_ts >= 16)) {
441         ck_dir = 1;
442         /*(temporarily modify fn)*/
443         fn[dirlen] = '\0';
444         if (0 != lstat(fn, &lst)) {
445             fn[dirlen] = '/';
446             return NULL;
447         }
448         if (!S_ISLNK(lst.st_mode)) {
449             st = &lst;
450         }
451         else if (0 != stat(fn, st)) { /*st passed in now is stat() of dir*/
452             fn[dirlen] = '/';
453             return NULL;
454         }
455         fn[dirlen] = '/';
456     }
457 
458     int ck_lnk = (NULL == fam_dir);
459     if (ck_dir && NULL != fam_dir) {
460         /* check stat() matches device and inode, just in case an external event
461          * not being monitored occurs (e.g. rename of unmonitored parent dir)*/
462         if (st->st_dev != fam_dir->st_dev || st->st_ino != fam_dir->st_ino) {
463             ck_lnk = 1;
464             /*(modifies scf->dirs but no need to re-splay for dir_ndx since
465              * fam_dir is not NULL and so splaytree_insert not called below)*/
466             if (scf->dirs) fam_dir_invalidate_tree(scf->dirs, fn, dirlen);
467             if (!fn_is_dir) /*(if dir, caller is updating stat_cache_entry)*/
468                 stat_cache_update_entry(fn, dirlen, st, NULL);
469             /*(must not delete tree since caller is holding a valid node)*/
470             stat_cache_invalidate_dir_tree(fn, dirlen);
471             if (0 != FAMCancelMonitor(&scf->fam, &fam_dir->req)
472                 || 0 != FAMMonitorDirectory(&scf->fam, fam_dir->name->ptr,
473                                             &fam_dir->req,
474                                             (void *)(intptr_t)dir_ndx)) {
475                 fam_dir->stat_ts = 0; /* invalidate */
476                 return NULL;
477             }
478             fam_dir->st_dev = st->st_dev;
479             fam_dir->st_ino = st->st_ino;
480         }
481         fam_dir->stat_ts = cur_ts;
482     }
483 
484     if (NULL == fam_dir) {
485         fam_dir = fam_dir_entry_init(fn, dirlen);
486 
487         if (0 != FAMMonitorDirectory(&scf->fam,fam_dir->name->ptr,&fam_dir->req,
488                                      (void *)(intptr_t)dir_ndx)) {
489             log_error(scf->errh, __FILE__, __LINE__,
490               "monitoring dir failed: %s file: %s %s",
491               fam_dir->name->ptr, fn, FamErrlist[FAMErrno]);
492             fam_dir_entry_free(fam_dir);
493             return NULL;
494         }
495 
496         scf->dirs = splaytree_insert(scf->dirs, dir_ndx, fam_dir);
497         fam_dir->stat_ts= cur_ts;
498         fam_dir->st_dev = st->st_dev;
499         fam_dir->st_ino = st->st_ino;
500     }
501 
502     if (ck_lnk) {
503         if (fn_is_dir) {
504             /*(temporarily modify fn)*/
505             char e = fn[dirlen];
506             fn[dirlen] = '\0';
507             if (0 != lstat(fn, &lst)) {
508                 fn[dirlen] = e;
509                 return NULL;
510             }
511             fn[dirlen] = e;
512         }
513         if (fam_dir->fam_parent) {
514             --fam_dir->fam_parent->refcnt;
515             fam_dir->fam_parent = NULL;
516         }
517         if (S_ISLNK(lst.st_mode)) {
518             fam_dir->fam_parent = fam_dir_monitor(scf, fn, dirlen, &lst);
519         }
520     }
521 
522     ++fam_dir->refcnt;
523     return fam_dir;
524 }
525 
526 #endif
527 
528 
529 static stat_cache_entry * stat_cache_entry_init(void) {
530     stat_cache_entry *sce = calloc(1, sizeof(*sce));
531     force_assert(NULL != sce);
532     return sce;
533 }
534 
535 static void stat_cache_entry_free(void *data) {
536     stat_cache_entry *sce = data;
537     if (!sce) return;
538 
539   #ifdef HAVE_FAM_H
540     /*(decrement refcnt only;
541      * defer cancelling FAM monitor on dir even if refcnt reaches zero)*/
542     if (sce->fam_dir) --((fam_dir_entry *)sce->fam_dir)->refcnt;
543   #endif
544 
545     free(sce->name.ptr);
546     free(sce->etag.ptr);
547     if (sce->content_type.size) free(sce->content_type.ptr);
548 
549     free(sce);
550 }
551 
552 #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
553 
554 static const char *attrname = "Content-Type";
555 static char attrval[128];
556 static buffer attrb = { attrval, 0, 0 };
557 
558 static int stat_cache_attr_get(char *name) {
559   #if defined(HAVE_XATTR)
560     int attrlen = sizeof(attrval)-1;
561     if (0 == attr_get(name, attrname, attrval, &attrlen, 0))
562   #elif defined(HAVE_EXTATTR)
563     ssize_t attrlen;
564     if (0 < (attrlen = extattr_get_file(name, EXTATTR_NAMESPACE_USER, attrname,
565                                         attrval, sizeof(attrval)-1)))
566   #endif
567     {
568         attrval[attrlen] = '\0';
569         attrb.used = (uint32_t)(attrlen + 1);
570         return 1;
571     }
572     return 0;
573 }
574 
575 #endif
576 
577 int stat_cache_init(fdevents *ev, log_error_st *errh) {
578   #ifdef HAVE_FAM_H
579     if (sc.stat_cache_engine == STAT_CACHE_ENGINE_FAM) {
580         sc.scf = stat_cache_init_fam(ev, errh);
581         if (NULL == sc.scf) return 0;
582     }
583   #else
584     UNUSED(ev);
585     UNUSED(errh);
586   #endif
587 
588     return 1;
589 }
590 
591 void stat_cache_free(void) {
592     splay_tree *sptree = sc.files;
593     while (sptree) {
594         stat_cache_entry_free(sptree->data);
595         sptree = splaytree_delete(sptree, sptree->key);
596     }
597     sc.files = NULL;
598 
599   #ifdef HAVE_FAM_H
600     stat_cache_free_fam(sc.scf);
601     sc.scf = NULL;
602   #endif
603 
604   #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
605     attrname = "Content-Type";
606   #endif
607 
608     sc.stat_cache_engine = STAT_CACHE_ENGINE_SIMPLE; /*(default)*/
609 }
610 
611 void stat_cache_xattrname (const char *name) {
612   #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
613     attrname = name;
614   #else
615     UNUSED(name);
616   #endif
617 }
618 
619 int stat_cache_choose_engine (const buffer *stat_cache_string, log_error_st *errh) {
620     if (buffer_string_is_empty(stat_cache_string))
621         sc.stat_cache_engine = STAT_CACHE_ENGINE_SIMPLE;
622     else if (buffer_eq_slen(stat_cache_string, CONST_STR_LEN("simple")))
623         sc.stat_cache_engine = STAT_CACHE_ENGINE_SIMPLE;
624 #ifdef HAVE_FAM_H
625     else if (buffer_eq_slen(stat_cache_string, CONST_STR_LEN("fam")))
626         sc.stat_cache_engine = STAT_CACHE_ENGINE_FAM;
627 #endif
628     else if (buffer_eq_slen(stat_cache_string, CONST_STR_LEN("disable")))
629         sc.stat_cache_engine = STAT_CACHE_ENGINE_NONE;
630     else {
631         log_error(errh, __FILE__, __LINE__,
632           "server.stat-cache-engine can be one of \"disable\", \"simple\","
633 #ifdef HAVE_FAM_H
634           " \"fam\","
635 #endif
636           " but not: %s", stat_cache_string->ptr);
637         return -1;
638     }
639     return 0;
640 }
641 
642 const buffer * stat_cache_mimetype_by_ext(const array * const mimetypes, const char * const name, const size_t nlen)
643 {
644     const char * const end = name + nlen; /*(end of string)*/
645     const uint32_t used = mimetypes->used;
646     if (used < 16) {
647         for (uint32_t i = 0; i < used; ++i) {
648             /* suffix match */
649             const data_string *ds = (data_string *)mimetypes->data[i];
650             const size_t klen = buffer_string_length(&ds->key);
651             if (klen <= nlen && buffer_eq_icase_ssn(end-klen, ds->key.ptr, klen))
652                 return &ds->value;
653         }
654     }
655     else {
656         const char *s;
657         const data_string *ds;
658         if (nlen) {
659             for (s = end-1; s != name && *s != '/'; --s) ; /*(like memrchr())*/
660             if (*s == '/') ++s;
661         }
662         else {
663             s = name;
664         }
665         /* search for basename, then longest .ext2.ext1, then .ext1, then "" */
666         ds = (const data_string *)array_get_element_klen(mimetypes, s, end - s);
667         if (NULL != ds) return &ds->value;
668         while (++s < end) {
669             while (*s != '.' && ++s != end) ;
670             if (s == end) break;
671             /* search ".ext" then "ext" */
672             ds = (const data_string *)array_get_element_klen(mimetypes, s, end - s);
673             if (NULL != ds) return &ds->value;
674             /* repeat search without leading '.' to handle situation where
675              * admin configured mimetype.assign keys without leading '.' */
676             if (++s < end) {
677                 if (*s == '.') { --s; continue; }
678                 ds = (const data_string *)array_get_element_klen(mimetypes, s, end - s);
679                 if (NULL != ds) return &ds->value;
680             }
681         }
682         /* search for ""; catchall */
683         ds = (const data_string *)array_get_element_klen(mimetypes, CONST_STR_LEN(""));
684         if (NULL != ds) return &ds->value;
685     }
686 
687     return NULL;
688 }
689 
690 #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
691 
692 const buffer * stat_cache_mimetype_by_xattr(const char * const name)
693 {
694     return stat_cache_attr_get(name) ? &attrb : NULL;
695 }
696 
697 const buffer * stat_cache_content_type_get_by_xattr(stat_cache_entry *sce, const array *mimetypes, int use_xattr)
698 {
699     /*(invalid caching if user config has multiple, different
700      * r->conf.mimetypes for same extension (not expected))*/
701     if (!buffer_string_is_empty(&sce->content_type)) return &sce->content_type;
702 
703     if (!S_ISREG(sce->st.st_mode)) return NULL;
704 
705     /* cache mimetype */
706     const buffer *mtype =
707       (use_xattr) ? stat_cache_mimetype_by_xattr(sce->name.ptr) : NULL;
708     if (NULL == mtype)
709         mtype = stat_cache_mimetype_by_ext(mimetypes,CONST_BUF_LEN(&sce->name));
710     if (NULL != mtype) {
711         if (sce->content_type.size) {
712             buffer_copy_buffer(&sce->content_type, mtype);
713         }
714         else if (mtype == &attrb) {
715             sce->content_type.ptr = NULL;
716             buffer_copy_buffer(&sce->content_type, mtype);
717         }
718         else {
719             /*(copy pointers from mimetypes array; avoid allocation)*/
720             sce->content_type.ptr = mtype->ptr;
721             sce->content_type.used = mtype->used;
722             /*(leave sce->content_type.size = 0 to flag not-allocated)*/
723         }
724     }
725     else
726         buffer_clear(&sce->content_type);
727 
728     return &sce->content_type;
729 }
730 
731 #else
732 
733 const buffer * stat_cache_content_type_get_by_ext(stat_cache_entry *sce, const array *mimetypes)
734 {
735     /*(invalid caching if user config has multiple, different
736      * r->conf.mimetypes for same extension (not expected))*/
737     if (!buffer_string_is_empty(&sce->content_type)) return &sce->content_type;
738 
739     if (!S_ISREG(sce->st.st_mode)) return NULL;
740 
741     /* cache mimetype */
742     const buffer * const mtype =
743       stat_cache_mimetype_by_ext(mimetypes, CONST_BUF_LEN(&sce->name));
744     if (NULL != mtype) {
745         /*(copy pointers from mimetypes array; avoid allocation)*/
746         sce->content_type.ptr = mtype->ptr;
747         sce->content_type.used = mtype->used;
748         /*(leave sce->content_type.size = 0 to flag not-allocated)*/
749     }
750     else
751         buffer_clear(&sce->content_type);
752 
753     return &sce->content_type;
754 }
755 
756 #endif
757 
758 const buffer * stat_cache_etag_get(stat_cache_entry *sce, int flags) {
759     /*(invalid caching if user cfg has multiple, different r->conf.etag_flags
760      * for same path (not expected, since etag flags should be by filesystem))*/
761     if (!buffer_string_is_empty(&sce->etag)) return &sce->etag;
762 
763     if (S_ISREG(sce->st.st_mode) || S_ISDIR(sce->st.st_mode)) {
764         if (0 == flags) return NULL;
765         etag_create(&sce->etag, &sce->st, flags);
766         return &sce->etag;
767     }
768 
769     return NULL;
770 }
771 
772 void stat_cache_update_entry(const char *name, size_t len,
773                              struct stat *st, buffer *etagb)
774 {
775     if (sc.stat_cache_engine == STAT_CACHE_ENGINE_NONE) return;
776     force_assert(0 != len);
777     if (name[len-1] == '/') { if (0 == --len) len = 1; }
778     splay_tree **sptree = &sc.files;
779     stat_cache_entry *sce =
780       stat_cache_sptree_find(sptree, name, len);
781     if (sce && buffer_is_equal_string(&sce->name, name, len)) {
782         sce->stat_ts = log_epoch_secs;
783         sce->st = *st; /* etagb might be NULL to clear etag (invalidate) */
784         buffer_copy_string_len(&sce->etag, CONST_BUF_LEN(etagb));
785       #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
786         buffer_clear(&sce->content_type);
787       #endif
788     }
789 }
790 
791 void stat_cache_delete_entry(const char *name, size_t len)
792 {
793     if (sc.stat_cache_engine == STAT_CACHE_ENGINE_NONE) return;
794     force_assert(0 != len);
795     if (name[len-1] == '/') { if (0 == --len) len = 1; }
796     splay_tree **sptree = &sc.files;
797     stat_cache_entry *sce = stat_cache_sptree_find(sptree, name, len);
798     if (sce && buffer_is_equal_string(&sce->name, name, len)) {
799         stat_cache_entry_free(sce);
800         *sptree = splaytree_delete(*sptree, (*sptree)->key);
801     }
802 }
803 
804 void stat_cache_invalidate_entry(const char *name, size_t len)
805 {
806     splay_tree **sptree = &sc.files;
807     stat_cache_entry *sce = stat_cache_sptree_find(sptree, name, len);
808     if (sce && buffer_is_equal_string(&sce->name, name, len)) {
809         sce->stat_ts = 0;
810       #ifdef HAVE_FAM_H
811         if (sce->fam_dir != NULL) {
812             --((fam_dir_entry *)sce->fam_dir)->refcnt;
813             sce->fam_dir = NULL;
814         }
815       #endif
816     }
817 }
818 
819 #ifdef HAVE_FAM_H
820 
821 static void stat_cache_invalidate_dir_tree_walk(splay_tree *t,
822                                                 const char *name, size_t len)
823 {
824     if (t->left)  stat_cache_invalidate_dir_tree_walk(t->left,  name, len);
825     if (t->right) stat_cache_invalidate_dir_tree_walk(t->right, name, len);
826 
827     buffer *b = &((stat_cache_entry *)t->data)->name;
828     size_t blen = buffer_string_length(b);
829     if (blen > len && b->ptr[len] == '/' && 0 == memcmp(b->ptr, name, len)) {
830         stat_cache_entry *sce = t->data;
831         sce->stat_ts = 0;
832         if (sce->fam_dir != NULL) {
833             --((fam_dir_entry *)sce->fam_dir)->refcnt;
834             sce->fam_dir = NULL;
835         }
836     }
837 }
838 
839 static void stat_cache_invalidate_dir_tree(const char *name, size_t len)
840 {
841     splay_tree * const sptree = sc.files;
842     if (sptree) stat_cache_invalidate_dir_tree_walk(sptree, name, len);
843 }
844 
845 #endif
846 
847 /*
848  * walk though splay_tree and collect contents of dir tree.
849  * remove tagged entries in a second loop
850  */
851 
852 static void stat_cache_tag_dir_tree(splay_tree *t, const char *name, size_t len,
853                                     int *keys, int *ndx)
854 {
855     if (*ndx == 8192) return; /*(must match num array entries in keys[])*/
856     if (t->left)  stat_cache_tag_dir_tree(t->left,  name, len, keys, ndx);
857     if (t->right) stat_cache_tag_dir_tree(t->right, name, len, keys, ndx);
858     if (*ndx == 8192) return; /*(must match num array entries in keys[])*/
859 
860     buffer *b = &((stat_cache_entry *)t->data)->name;
861     size_t blen = buffer_string_length(b);
862     if (blen > len && b->ptr[len] == '/' && 0 == memcmp(b->ptr, name, len))
863         keys[(*ndx)++] = t->key;
864 }
865 
866 static void stat_cache_prune_dir_tree(const char *name, size_t len)
867 {
868     int max_ndx, i;
869     int keys[8192]; /* 32k size on stack */
870     splay_tree *sptree = sc.files;
871     do {
872         if (!sptree) return;
873         max_ndx = 0;
874         stat_cache_tag_dir_tree(sptree, name, len, keys, &max_ndx);
875         for (i = 0; i < max_ndx; ++i) {
876             const int ndx = keys[i];
877             splay_tree *node = sptree = splaytree_splay(sptree, ndx);
878             if (node && node->key == ndx) {
879                 stat_cache_entry_free(node->data);
880                 sptree = splaytree_delete(sptree, ndx);
881             }
882         }
883     } while (max_ndx == sizeof(keys)/sizeof(int));
884     sc.files = sptree;
885 }
886 
887 static void stat_cache_delete_tree(const char *name, size_t len)
888 {
889     stat_cache_delete_entry(name, len);
890     stat_cache_prune_dir_tree(name, len);
891 }
892 
893 void stat_cache_delete_dir(const char *name, size_t len)
894 {
895     force_assert(0 != len);
896     if (name[len-1] == '/') { if (0 == --len) len = 1; }
897     stat_cache_delete_tree(name, len);
898   #ifdef HAVE_FAM_H
899     if (sc.stat_cache_engine == STAT_CACHE_ENGINE_FAM) {
900         splay_tree **sptree = &sc.scf->dirs;
901         fam_dir_entry *fam_dir = stat_cache_sptree_find(sptree, name, len);
902         if (fam_dir && buffer_is_equal_string(fam_dir->name, name, len))
903             fam_dir_invalidate_node(fam_dir);
904         if (*sptree) fam_dir_invalidate_tree(*sptree, name, len);
905         fam_dir_periodic_cleanup();
906     }
907   #endif
908 }
909 
910 /***
911  *
912  *
913  *
914  * returns:
915  *  - HANDLER_FINISHED on cache-miss (don't forget to reopen the file)
916  *  - HANDLER_ERROR on stat() failed -> see errno for problem
917  */
918 
919 stat_cache_entry * stat_cache_get_entry(const buffer *name) {
920 	stat_cache_entry *sce = NULL;
921 	struct stat st;
922 	int file_ndx;
923 
924 	/* consistency: ensure lookup name does not end in '/' unless root "/"
925 	 * (but use full path given with stat(), even with trailing '/') */
926 	int final_slash = 0;
927 	size_t len = buffer_string_length(name);
928 	force_assert(0 != len);
929 	if (name->ptr[len-1] == '/') { final_slash = 1; if (0 == --len) len = 1; }
930 	/* Note: paths are expected to be normalized before calling stat_cache,
931 	 * e.g. without repeated '/' */
932 
933 	if (name->ptr[0] != '/') return NULL;
934 
935 	/*
936 	 * check if the directory for this file has changed
937 	 */
938 
939 	const time_t cur_ts = log_epoch_secs;
940 
941 	file_ndx = hashme(name->ptr, len);
942 	splay_tree * const sptree = sc.files = splaytree_splay(sc.files, file_ndx);
943 
944 	if (sptree && (sptree->key == file_ndx)) {
945 		/* we have seen this file already and
946 		 * don't stat() it again in the same second */
947 
948 		sce = sptree->data;
949 
950 		/* check if the name is the same, we might have a collision */
951 
952 		if (buffer_is_equal_string(&sce->name, name->ptr, len)) {
953 			if (sc.stat_cache_engine == STAT_CACHE_ENGINE_SIMPLE) {
954 				if (sce->stat_ts == cur_ts) {
955 					if (final_slash && !S_ISDIR(sce->st.st_mode)) {
956 						errno = ENOTDIR;
957 						return NULL;
958 					}
959 					return sce;
960 				}
961 			}
962 		      #ifdef HAVE_FAM_H
963 			else if (sc.stat_cache_engine == STAT_CACHE_ENGINE_FAM
964 				 && sce->fam_dir) { /* entry is in monitored dir */
965 				/* re-stat() periodically, even if monitoring for changes
966 				 * (due to limitations in stat_cache.c use of FAM)
967 				 * (gaps due to not continually monitoring an entire tree) */
968 				if (cur_ts - sce->stat_ts < 16) {
969 					if (final_slash && !S_ISDIR(sce->st.st_mode)) {
970 						errno = ENOTDIR;
971 						return NULL;
972 					}
973 					return sce;
974 				}
975 			}
976 		      #endif
977 		} else {
978 			/* collision, forget about the entry */
979 			sce = NULL;
980 		}
981 	}
982 
983 	if (-1 == stat(name->ptr, &st)) {
984 		return NULL;
985 	}
986 
987 	if (S_ISREG(st.st_mode)) {
988 		/* fix broken stat/open for symlinks to reg files with appended slash on freebsd,osx */
989 		if (name->ptr[buffer_string_length(name) - 1] == '/') {
990 			errno = ENOTDIR;
991 			return NULL;
992 		}
993 	}
994 
995 	if (NULL == sce) {
996 
997 		sce = stat_cache_entry_init();
998 		buffer_copy_string_len(&sce->name, name->ptr, len);
999 
1000 		/* already splayed file_ndx */
1001 		if (NULL != sptree && sptree->key == file_ndx) {
1002 			/* hash collision: replace old entry */
1003 			stat_cache_entry_free(sptree->data);
1004 			sptree->data = sce;
1005 		} else {
1006 			sc.files = splaytree_insert(sptree, file_ndx, sce);
1007 		}
1008 
1009 	} else {
1010 
1011 		buffer_clear(&sce->etag);
1012 	      #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
1013 		buffer_clear(&sce->content_type);
1014 	      #endif
1015 
1016 	}
1017 
1018 	sce->st = st; /*(copy prior to calling fam_dir_monitor())*/
1019 
1020 #ifdef HAVE_FAM_H
1021 	if (sc.stat_cache_engine == STAT_CACHE_ENGINE_FAM) {
1022 		if (sce->fam_dir) --((fam_dir_entry *)sce->fam_dir)->refcnt;
1023 		sce->fam_dir =
1024 		  fam_dir_monitor(sc.scf, CONST_BUF_LEN(name), &st);
1025 	      #if 0 /*(performed below)*/
1026 		if (NULL != sce->fam_dir) {
1027 			/*(may have been invalidated by dir change)*/
1028 			sce->stat_ts = cur_ts;
1029 		}
1030 	      #endif
1031 	}
1032 #endif
1033 
1034 	sce->stat_ts = cur_ts;
1035 	return sce;
1036 }
1037 
1038 int stat_cache_path_contains_symlink(const buffer *name, log_error_st *errh) {
1039     /* caller should check for symlinks only if we should block symlinks. */
1040 
1041     /* catch the obvious symlinks
1042      *
1043      * this is not a secure check as we still have a race-condition between
1044      * the stat() and the open. We can only solve this by
1045      * 1. open() the file
1046      * 2. fstat() the fd
1047      *
1048      * and keeping the file open for the rest of the time. But this can
1049      * only be done at network level.
1050      * */
1051 
1052   #ifdef HAVE_LSTAT
1053     /* we assume "/" can not be symlink,
1054      * so skip the symlink stuff if path is "/" */
1055     size_t len = buffer_string_length(name);
1056     force_assert(0 != len);
1057     force_assert(name->ptr[0] == '/');
1058     if (1 == len) return 0;
1059    #ifndef PATH_MAX
1060    #define PATH_MAX 4096
1061    #endif
1062     if (len >= PATH_MAX) return -1;
1063 
1064     char buf[PATH_MAX];
1065     memcpy(buf, name->ptr, len);
1066     char *s_cur = buf+len;
1067     do {
1068         *s_cur = '\0';
1069         struct stat st;
1070         if (0 == lstat(buf, &st)) {
1071             if (S_ISLNK(st.st_mode)) return 1;
1072         }
1073         else {
1074             log_perror(errh, __FILE__, __LINE__, "lstat failed for: %s", buf);
1075             return -1;
1076         }
1077     } while ((s_cur = strrchr(buf, '/')) != buf);
1078   #endif
1079 
1080     return 0;
1081 }
1082 
1083 int stat_cache_open_rdonly_fstat (const buffer *name, struct stat *st, int symlinks) {
1084 	/*(Note: O_NOFOLLOW affects only the final path segment, the target file,
1085 	 * not any intermediate symlinks along the path)*/
1086 	const int fd = fdevent_open_cloexec(name->ptr, symlinks, O_RDONLY, 0);
1087 	if (fd >= 0) {
1088 		if (0 == fstat(fd, st)) {
1089 			return fd;
1090 		} else {
1091 			close(fd);
1092 		}
1093 	}
1094 	return -1;
1095 }
1096 
1097 /**
1098  * remove stat() from cache which haven't been stat()ed for
1099  * more than 2 seconds
1100  *
1101  *
1102  * walk though the stat-cache, collect the ids which are too old
1103  * and remove them in a second loop
1104  */
1105 
1106 static void stat_cache_tag_old_entries(splay_tree * const t, int * const keys, uint32_t * const ndx, const time_t max_age, const time_t cur_ts) {
1107     if (!t) return;
1108 
1109     stat_cache_tag_old_entries(t->left, keys, ndx, max_age, cur_ts);
1110     stat_cache_tag_old_entries(t->right, keys, ndx, max_age, cur_ts);
1111 
1112     const stat_cache_entry * const sce = t->data;
1113 
1114     if (cur_ts - sce->stat_ts > max_age) {
1115         keys[(*ndx)++] = t->key;
1116     }
1117 }
1118 
1119 static void stat_cache_periodic_cleanup(const time_t max_age, const time_t cur_ts) {
1120 	splay_tree *sptree = sc.files;
1121 	if (!sptree) return;
1122 
1123 	int * const keys = calloc(1, sizeof(int) * sptree->size);
1124 	force_assert(NULL != keys);
1125 
1126 	uint32_t max_ndx = 0;
1127 	stat_cache_tag_old_entries(sptree, keys, &max_ndx, max_age, cur_ts);
1128 
1129 	for (uint32_t i = 0; i < max_ndx; ++i) {
1130 		int ndx = keys[i];
1131 		sptree = splaytree_splay(sptree, ndx);
1132 		if (sptree && sptree->key == ndx) {
1133 			stat_cache_entry_free(sptree->data);
1134 			sptree = splaytree_delete(sptree, ndx);
1135 		}
1136 	}
1137 
1138 	sc.files = sptree;
1139 
1140 	free(keys);
1141 }
1142 
1143 void stat_cache_trigger_cleanup(void) {
1144 	time_t max_age = 2;
1145 
1146       #ifdef HAVE_FAM_H
1147 	if (STAT_CACHE_ENGINE_FAM == sc.stat_cache_engine) {
1148 		if (log_epoch_secs & 0x1F) return;
1149 		/* once every 32 seconds (0x1F == 31) */
1150 		max_age = 32;
1151 		fam_dir_periodic_cleanup();
1152 		/* By doing this before stat_cache_periodic_cleanup(),
1153 		 * entries used within the next max_age secs will remain
1154 		 * monitored, instead of effectively flushing and
1155 		 * rebuilding the FAM monitoring every max_age seconds */
1156 	}
1157       #endif
1158 
1159 	stat_cache_periodic_cleanup(max_age, log_epoch_secs);
1160 }
1161