xref: /lighttpd1.4/src/stat_cache.c (revision 0563407d)
1 #include "first.h"
2 
3 #include "stat_cache.h"
4 #include "log.h"
5 #include "fdevent.h"
6 #include "etag.h"
7 #include "splaytree.h"
8 
9 #include <sys/types.h>
10 #include <sys/stat.h>
11 
12 #include <stdlib.h>
13 #include <string.h>
14 #include <errno.h>
15 #include <unistd.h>
16 #include <fcntl.h>
17 
18 #if defined(HAVE_SYS_XATTR_H)
19 # include <sys/xattr.h>
20 #elif defined(HAVE_ATTR_ATTRIBUTES_H)
21 # include <attr/attributes.h>
22 #endif
23 
24 #ifdef HAVE_SYS_EXTATTR_H
25 # include <sys/extattr.h>
26 #endif
27 
28 #ifndef HAVE_LSTAT
29 #define lstat stat
30 #ifndef S_ISLNK
31 #define S_ISLNK(mode) (0)
32 #endif
33 #endif
34 
35 /*
36  * stat-cache
37  *
38  * - a splay-tree is used as we can use the caching effect of it
39  */
40 
41 enum {
42   STAT_CACHE_ENGINE_SIMPLE, /*(default)*/
43   STAT_CACHE_ENGINE_NONE,
44   STAT_CACHE_ENGINE_FAM
45 };
46 
47 struct stat_cache_fam;  /* declaration */
48 
49 typedef struct stat_cache {
50 	int stat_cache_engine;
51 	splay_tree *files; /* nodes of tree are (stat_cache_entry *) */
52 	struct stat_cache_fam *scf;
53 } stat_cache;
54 
55 static stat_cache sc;
56 
57 
58 /* the famous DJB hash function for strings */
59 __attribute_pure__
60 static uint32_t djbhash(const char *str, const size_t len)
61 {
62     const unsigned char * const s = (const unsigned char *)str;
63     uint32_t hash = 5381;
64     for (size_t i = 0; i < len; ++i) hash = ((hash << 5) + hash) ^ s[i];
65     return hash;
66 }
67 
68 
69 __attribute_pure__
70 static uint32_t hashme(const char *str, const size_t len)
71 {
72     /* strip highest bit of hash value for splaytree */
73     return djbhash(str,len) & ~(((uint32_t)1) << 31);
74 }
75 
76 
77 static void * stat_cache_sptree_find(splay_tree ** const sptree,
78                                      const char * const name,
79                                      size_t len)
80 {
81     const int ndx = hashme(name, len);
82     *sptree = splaytree_splay(*sptree, ndx);
83     return (*sptree && (*sptree)->key == ndx) ? (*sptree)->data : NULL;
84 }
85 
86 
87 #ifdef HAVE_FAM_H
88 
89 /* monitor changes in directories using FAM
90  *
91  * This implementation employing FAM monitors directories as they are used,
92  * and maintains a reference count for cache use within stat_cache.c.
93  * A periodic job runs in lighttpd every 32 seconds, expiring entries unused
94  * in last 64 seconds out of the cache and cancelling FAM monitoring.  Items
95  * within the cache are checked against the filesystem upon use if last stat()
96  * was greater than or equal to 16 seconds ago.
97  *
98  * This implementation does not monitor every directory in a tree, and therefore
99  * the cache may get out-of-sync with the filesystem.  Delays in receiving and
100  * processing events from FAM might also lead to stale cache entries.
101  *
102  * For many websites, a large number of files are seldom, if ever, modified,
103  * and a common practice with images is to create a new file with a new name
104  * when a new version is needed, in order for client browsers and CDNs to better
105  * cache the content.  Given this, most use will see little difference in
106  * performance between server.stat-cache-engine = "fam" and "simple" (default).
107  * The default server.stat-cache-engine = "simple" calls stat() on a target once
108  * per second, and reuses that information until the next second.  For use where
109  * changes must be immediately visible, server.stat-cache-engine = "disable"
110  * should be used.
111  *
112  * When considering use of server.stat-cache-engine = "fam", there are a few
113  * additional limitations for this cache implementation using FAM.
114  * - symlinks to files located outside of the current directory do not result
115  *   in changes to that file being monitored (unless that file is in a directory
116  *   which is monitored as a result of a different request).  symlinks can be
117  *   chained and can be circular.  This implementation *does not* readlink() or
118  *   realpath() to resolve the chains to find and monitor the ultimate target
119  *   directory.  While symlinks to files located outside the current directory
120  *   are not monitored, symlinks to directories *are* monitored, though chains
121  *   of symlinks to directories do not result in monitoring of the directories
122  *   containing intermediate symlinks to the target directory.
123  * - directory rename of a directory which is not currently being monitored will
124  *   result in stale information in the cache if there is a subdirectory that is
125  *   being monitored.
126  * Even though lighttpd will not receive FAM events in the above cases, lighttpd
127  * does re-validate the information in the cache upon use if the cache entry has
128  * not been checked in 16 seconds, so that is the upper limit for use of stale
129  * data.
130  *
131  * Use of server.stat-cache-engine = "fam" is discouraged for extremely volatile
132  * directories such as temporary directories (e.g. /tmp and maybe /var/tmp) due
133  * to the overhead of processing the additional noise generated from changes.
134  * Related, server.stat-cache-engine = "fam" is not recommended on trees of
135  * untrusted files where a malicious user could generate an excess of change
136  * events.
137  *
138  * Internal note: lighttpd walks the caches to prune trees in stat_cache when an
139  * event is received for a directory (or symlink to a directory) which has been
140  * deleted or renamed.  The splaytree data structure is suboptimal for frequent
141  * changes of large directories trees where there have been a large number of
142  * different files recently accessed and part of the stat_cache.
143  */
144 
145 #include <fam.h>
146 
147 typedef struct fam_dir_entry {
148 	buffer *name;
149 	int refcnt;
150 	FAMRequest req;
151 	time_t stat_ts;
152 	dev_t st_dev;
153 	ino_t st_ino;
154 	struct fam_dir_entry *fam_parent;
155 } fam_dir_entry;
156 
157 typedef struct stat_cache_fam {
158 	splay_tree *dirs; /* the nodes of the tree are fam_dir_entry */
159 	FAMConnection fam;
160 	log_error_st *errh;
161 	fdevents *ev;
162 	fdnode *fdn;
163 	int fd;
164 } stat_cache_fam;
165 
166 static fam_dir_entry * fam_dir_entry_init(const char *name, size_t len)
167 {
168     fam_dir_entry * const fam_dir = calloc(1, sizeof(*fam_dir));
169     force_assert(NULL != fam_dir);
170 
171     fam_dir->name = buffer_init();
172     buffer_copy_string_len(fam_dir->name, name, len);
173     fam_dir->refcnt = 0;
174 
175     return fam_dir;
176 }
177 
178 static void fam_dir_entry_free(fam_dir_entry *fam_dir)
179 {
180     if (!fam_dir) return;
181     /*(fam_dir->parent might be invalid pointer here; ignore)*/
182     buffer_free(fam_dir->name);
183     free(fam_dir);
184 }
185 
186 static void fam_dir_invalidate_node(fam_dir_entry *fam_dir)
187 {
188     fam_dir->stat_ts = 0;
189     if (fam_dir->fam_parent) {
190         --fam_dir->fam_parent->refcnt;
191         fam_dir->fam_parent = NULL;
192     }
193 }
194 
195 /*
196  * walk though splay_tree and collect contents of dir tree.
197  * remove tagged entries in a second loop
198  */
199 
200 static void fam_dir_tag_refcnt(splay_tree *t, int *keys, int *ndx)
201 {
202     if (*ndx == 8192) return; /*(must match num array entries in keys[])*/
203     if (t->left)  fam_dir_tag_refcnt(t->left,  keys, ndx);
204     if (t->right) fam_dir_tag_refcnt(t->right, keys, ndx);
205     if (*ndx == 8192) return; /*(must match num array entries in keys[])*/
206 
207     fam_dir_entry * const fam_dir = t->data;
208     if (0 == fam_dir->refcnt) {
209         fam_dir_invalidate_node(fam_dir);
210         keys[(*ndx)++] = t->key;
211     }
212 }
213 
214 static void fam_dir_periodic_cleanup() {
215     int max_ndx, i;
216     int keys[8192]; /* 32k size on stack */
217     stat_cache_fam * const scf = sc.scf;
218     do {
219         if (!scf->dirs) return;
220         max_ndx = 0;
221         fam_dir_tag_refcnt(scf->dirs, keys, &max_ndx);
222         for (i = 0; i < max_ndx; ++i) {
223             const int ndx = keys[i];
224             splay_tree *node = scf->dirs = splaytree_splay(scf->dirs, ndx);
225             if (node && node->key == ndx) {
226                 fam_dir_entry *fam_dir = node->data;
227                 scf->dirs = splaytree_delete(scf->dirs, ndx);
228                 FAMCancelMonitor(&scf->fam, &fam_dir->req);
229                 fam_dir_entry_free(fam_dir);
230             }
231         }
232     } while (max_ndx == sizeof(keys)/sizeof(int));
233 }
234 
235 static void fam_dir_invalidate_tree(splay_tree *t, const char *name, size_t len)
236 {
237     /*force_assert(t);*/
238     if (t->left)  fam_dir_invalidate_tree(t->left,  name, len);
239     if (t->right) fam_dir_invalidate_tree(t->right, name, len);
240 
241     fam_dir_entry * const fam_dir = t->data;
242     buffer *b = fam_dir->name;
243     size_t blen = buffer_string_length(b);
244     if (blen > len && b->ptr[len] == '/' && 0 == memcmp(b->ptr, name, len))
245         fam_dir_invalidate_node(fam_dir);
246 }
247 
248 /* declarations */
249 static void stat_cache_delete_tree(const char *name, size_t len);
250 static void stat_cache_invalidate_dir_tree(const char *name, size_t len);
251 
252 static void stat_cache_handle_fdevent_in(stat_cache_fam *scf)
253 {
254     for (int i = 0, ndx; i || (i = FAMPending(&scf->fam)) > 0; --i) {
255         FAMEvent fe;
256         if (FAMNextEvent(&scf->fam, &fe) < 0) break;
257 
258         /* ignore events which may have been pending for
259          * paths recently cancelled via FAMCancelMonitor() */
260         ndx = (int)(intptr_t)fe.userdata;
261         scf->dirs = splaytree_splay(scf->dirs, ndx);
262         if (!scf->dirs || scf->dirs->key != ndx) {
263             continue;
264         }
265         fam_dir_entry *fam_dir = scf->dirs->data;
266         if (FAMREQUEST_GETREQNUM(&fam_dir->req)
267             != FAMREQUEST_GETREQNUM(&fe.fr)) {
268             continue;
269         }
270 
271         if (fe.filename[0] != '/') {
272             buffer * const n = fam_dir->name;
273             fam_dir_entry *fam_link;
274             size_t len;
275             switch(fe.code) {
276             case FAMCreated:
277                 /* file created in monitored dir modifies dir and
278                  * we should get a separate FAMChanged event for dir.
279                  * Therefore, ignore file FAMCreated event here.
280                  * Also, if FAMNoExists() is used, might get spurious
281                  * FAMCreated events as changes are made e.g. in monitored
282                  * sub-sub-sub dirs and the library discovers new (already
283                  * existing) dir entries */
284                 continue;
285             case FAMChanged:
286                 /* file changed in monitored dir does not modify dir */
287             case FAMDeleted:
288             case FAMMoved:
289                 /* file deleted or moved in monitored dir modifies dir,
290                  * but FAM provides separate notification for that */
291 
292                 /* temporarily append filename to dir in fam_dir->name to
293                  * construct path, then delete stat_cache entry (if any)*/
294                 len = buffer_string_length(n);
295                 buffer_append_string_len(n, CONST_STR_LEN("/"));
296                 buffer_append_string_len(n,fe.filename,strlen(fe.filename));
297                 /* (alternatively, could chose to stat() and update)*/
298                 stat_cache_invalidate_entry(CONST_BUF_LEN(n));
299 
300                 fam_link = /*(check if might be symlink to monitored dir)*/
301                   stat_cache_sptree_find(&scf->dirs, CONST_BUF_LEN(n));
302                 if (fam_link && !buffer_is_equal(fam_link->name, n))
303                     fam_link = NULL;
304 
305                 buffer_string_set_length(n, len);
306 
307                 if (fam_link) {
308                     /* replaced symlink changes containing dir */
309                     stat_cache_invalidate_entry(CONST_BUF_LEN(n));
310                     /* handle symlink to dir as deleted dir below */
311                     fe.code = FAMDeleted;
312                     fam_dir = fam_link;
313                     break;
314                 }
315                 continue;
316             default:
317                 continue;
318             }
319         }
320 
321         switch(fe.code) {
322         case FAMChanged:
323             stat_cache_invalidate_entry(CONST_BUF_LEN(fam_dir->name));
324             break;
325         case FAMDeleted:
326         case FAMMoved:
327             stat_cache_delete_tree(CONST_BUF_LEN(fam_dir->name));
328             fam_dir_invalidate_node(fam_dir);
329             if (scf->dirs)
330                 fam_dir_invalidate_tree(scf->dirs,CONST_BUF_LEN(fam_dir->name));
331             fam_dir_periodic_cleanup();
332             break;
333         default:
334             break;
335         }
336     }
337 }
338 
339 static handler_t stat_cache_handle_fdevent(void *ctx, int revent)
340 {
341 	stat_cache_fam * const scf = ctx; /* sc.scf */
342 
343 	if (revent & FDEVENT_IN) {
344 		stat_cache_handle_fdevent_in(scf);
345 	}
346 
347 	if (revent & (FDEVENT_HUP|FDEVENT_RDHUP)) {
348 		/* fam closed the connection */
349 		log_error(scf->errh, __FILE__, __LINE__,
350 		  "FAM connection closed; disabling stat_cache.");
351 		/* (although effectively STAT_CACHE_ENGINE_NONE,
352 		 *  do not change here so that periodic jobs clean up memory)*/
353 		/*sc.stat_cache_engine = STAT_CACHE_ENGINE_NONE; */
354 		fdevent_fdnode_event_del(scf->ev, scf->fdn);
355 		fdevent_unregister(scf->ev, scf->fd);
356 		scf->fdn = NULL;
357 
358 		FAMClose(&scf->fam);
359 		scf->fd = -1;
360 	}
361 
362 	return HANDLER_GO_ON;
363 }
364 
365 static stat_cache_fam * stat_cache_init_fam(fdevents *ev, log_error_st *errh) {
366 	stat_cache_fam *scf = calloc(1, sizeof(*scf));
367 	force_assert(scf);
368 	scf->fd = -1;
369 	scf->ev = ev;
370 	scf->errh = errh;
371 
372 	/* setup FAM */
373 	if (0 != FAMOpen2(&scf->fam, "lighttpd")) {
374 		log_error(errh, __FILE__, __LINE__,
375 		  "could not open a fam connection, dying.");
376 		return NULL;
377 	}
378       #ifdef HAVE_FAMNOEXISTS
379 	FAMNoExists(&scf->fam);
380       #endif
381 
382 	scf->fd = FAMCONNECTION_GETFD(&scf->fam);
383 	fdevent_setfd_cloexec(scf->fd);
384 	scf->fdn = fdevent_register(scf->ev, scf->fd, stat_cache_handle_fdevent, scf);
385 	fdevent_fdnode_event_set(scf->ev, scf->fdn, FDEVENT_IN | FDEVENT_RDHUP);
386 
387 	return scf;
388 }
389 
390 static void stat_cache_free_fam(stat_cache_fam *scf) {
391 	if (NULL == scf) return;
392 
393 	while (scf->dirs) {
394 		/*(skip entry invalidation and FAMCancelMonitor())*/
395 		splay_tree *node = scf->dirs;
396 		fam_dir_entry_free((fam_dir_entry *)node->data);
397 		scf->dirs = splaytree_delete(scf->dirs, node->key);
398 	}
399 
400 	if (-1 != scf->fd) {
401 		/*scf->fdn already cleaned up in fdevent_free()*/
402 		FAMClose(&scf->fam);
403 		/*scf->fd = -1;*/
404 	}
405 
406 	free(scf);
407 }
408 
409 static fam_dir_entry * fam_dir_monitor(stat_cache_fam *scf, char *fn, size_t dirlen, struct stat *st)
410 {
411     if (NULL == scf->fdn) return NULL; /* FAM connection closed; do nothing */
412     const int fn_is_dir = S_ISDIR(st->st_mode);
413     /*force_assert(0 != dirlen);*/
414     /*force_assert(fn[0] == '/');*/
415     /* consistency: ensure fn does not end in '/' unless root "/"
416      * FAM events will not end in '/', so easier to match this way */
417     if (fn[dirlen-1] == '/') --dirlen;
418     if (0 == dirlen) dirlen = 1; /* root dir ("/") */
419     /* Note: paths are expected to be normalized before calling stat_cache,
420      * e.g. without repeated '/' */
421     if (!fn_is_dir) {
422         while (fn[--dirlen] != '/') ;
423         if (0 == dirlen) dirlen = 1; /*(should not happen for file)*/
424     }
425     int dir_ndx = hashme(fn, dirlen);
426     fam_dir_entry *fam_dir = NULL;
427 
428     scf->dirs = splaytree_splay(scf->dirs, dir_ndx);
429     if (NULL != scf->dirs && scf->dirs->key == dir_ndx) {
430         fam_dir = scf->dirs->data;
431         if (!buffer_is_equal_string(fam_dir->name, fn, dirlen)) {
432             /* hash collision; preserve existing
433              * do not monitor new to avoid cache thrashing */
434             return NULL;
435         }
436         /* directory already registered */
437     }
438 
439     const time_t cur_ts = log_epoch_secs;
440     struct stat lst;
441     int ck_dir = fn_is_dir;
442     if (!fn_is_dir && (NULL==fam_dir || cur_ts - fam_dir->stat_ts >= 16)) {
443         ck_dir = 1;
444         /*(temporarily modify fn)*/
445         fn[dirlen] = '\0';
446         if (0 != lstat(fn, &lst)) {
447             fn[dirlen] = '/';
448             return NULL;
449         }
450         if (!S_ISLNK(lst.st_mode)) {
451             st = &lst;
452         }
453         else if (0 != stat(fn, st)) { /*st passed in now is stat() of dir*/
454             fn[dirlen] = '/';
455             return NULL;
456         }
457         fn[dirlen] = '/';
458     }
459 
460     int ck_lnk = (NULL == fam_dir);
461     if (ck_dir && NULL != fam_dir) {
462         /* check stat() matches device and inode, just in case an external event
463          * not being monitored occurs (e.g. rename of unmonitored parent dir)*/
464         if (st->st_dev != fam_dir->st_dev || st->st_ino != fam_dir->st_ino) {
465             ck_lnk = 1;
466             /*(modifies scf->dirs but no need to re-splay for dir_ndx since
467              * fam_dir is not NULL and so splaytree_insert not called below)*/
468             if (scf->dirs) fam_dir_invalidate_tree(scf->dirs, fn, dirlen);
469             if (!fn_is_dir) /*(if dir, caller is updating stat_cache_entry)*/
470                 stat_cache_update_entry(fn, dirlen, st, NULL);
471             /*(must not delete tree since caller is holding a valid node)*/
472             stat_cache_invalidate_dir_tree(fn, dirlen);
473             if (0 != FAMCancelMonitor(&scf->fam, &fam_dir->req)
474                 || 0 != FAMMonitorDirectory(&scf->fam, fam_dir->name->ptr,
475                                             &fam_dir->req,
476                                             (void *)(intptr_t)dir_ndx)) {
477                 fam_dir->stat_ts = 0; /* invalidate */
478                 return NULL;
479             }
480             fam_dir->st_dev = st->st_dev;
481             fam_dir->st_ino = st->st_ino;
482         }
483         fam_dir->stat_ts = cur_ts;
484     }
485 
486     if (NULL == fam_dir) {
487         fam_dir = fam_dir_entry_init(fn, dirlen);
488 
489         if (0 != FAMMonitorDirectory(&scf->fam,fam_dir->name->ptr,&fam_dir->req,
490                                      (void *)(intptr_t)dir_ndx)) {
491             log_error(scf->errh, __FILE__, __LINE__,
492               "monitoring dir failed: %s file: %s %s",
493               fam_dir->name->ptr, fn, FamErrlist[FAMErrno]);
494             fam_dir_entry_free(fam_dir);
495             return NULL;
496         }
497 
498         scf->dirs = splaytree_insert(scf->dirs, dir_ndx, fam_dir);
499         fam_dir->stat_ts= cur_ts;
500         fam_dir->st_dev = st->st_dev;
501         fam_dir->st_ino = st->st_ino;
502     }
503 
504     if (ck_lnk) {
505         if (fn_is_dir) {
506             /*(temporarily modify fn)*/
507             char e = fn[dirlen];
508             fn[dirlen] = '\0';
509             if (0 != lstat(fn, &lst)) {
510                 fn[dirlen] = e;
511                 return NULL;
512             }
513             fn[dirlen] = e;
514         }
515         if (fam_dir->fam_parent) {
516             --fam_dir->fam_parent->refcnt;
517             fam_dir->fam_parent = NULL;
518         }
519         if (S_ISLNK(lst.st_mode)) {
520             fam_dir->fam_parent = fam_dir_monitor(scf, fn, dirlen, &lst);
521         }
522     }
523 
524     ++fam_dir->refcnt;
525     return fam_dir;
526 }
527 
528 #endif
529 
530 
531 static stat_cache_entry * stat_cache_entry_init(void) {
532     stat_cache_entry *sce = calloc(1, sizeof(*sce));
533     force_assert(NULL != sce);
534     return sce;
535 }
536 
537 static void stat_cache_entry_free(void *data) {
538     stat_cache_entry *sce = data;
539     if (!sce) return;
540 
541   #ifdef HAVE_FAM_H
542     /*(decrement refcnt only;
543      * defer cancelling FAM monitor on dir even if refcnt reaches zero)*/
544     if (sce->fam_dir) --((fam_dir_entry *)sce->fam_dir)->refcnt;
545   #endif
546 
547     free(sce->name.ptr);
548     free(sce->etag.ptr);
549     if (sce->content_type.size) free(sce->content_type.ptr);
550 
551     free(sce);
552 }
553 
554 #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
555 
556 static const char *attrname = "Content-Type";
557 static char attrval[128];
558 static buffer attrb = { attrval, 0, 0 };
559 
560 static int stat_cache_attr_get(const char *name) {
561   #if defined(HAVE_XATTR)
562    #if defined(HAVE_SYS_XATTR_H)
563     ssize_t attrlen;
564     if (0 < (attrlen = getxattr(name, attrname,
565                                 attrval, sizeof(attrval)-1)))
566    #else
567     int attrlen = sizeof(attrval)-1;
568     if (0 == attr_get(name, attrname, attrval, &attrlen, 0))
569    #endif
570   #elif defined(HAVE_EXTATTR)
571     ssize_t attrlen;
572     if (0 < (attrlen = extattr_get_file(name, EXTATTR_NAMESPACE_USER, attrname,
573                                         attrval, sizeof(attrval)-1)))
574   #endif
575     {
576         attrval[attrlen] = '\0';
577         attrb.used = (uint32_t)(attrlen + 1);
578         return 1;
579     }
580     return 0;
581 }
582 
583 #endif
584 
585 int stat_cache_init(fdevents *ev, log_error_st *errh) {
586   #ifdef HAVE_FAM_H
587     if (sc.stat_cache_engine == STAT_CACHE_ENGINE_FAM) {
588         sc.scf = stat_cache_init_fam(ev, errh);
589         if (NULL == sc.scf) return 0;
590     }
591   #else
592     UNUSED(ev);
593     UNUSED(errh);
594   #endif
595 
596     return 1;
597 }
598 
599 void stat_cache_free(void) {
600     splay_tree *sptree = sc.files;
601     while (sptree) {
602         stat_cache_entry_free(sptree->data);
603         sptree = splaytree_delete(sptree, sptree->key);
604     }
605     sc.files = NULL;
606 
607   #ifdef HAVE_FAM_H
608     stat_cache_free_fam(sc.scf);
609     sc.scf = NULL;
610   #endif
611 
612   #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
613     attrname = "Content-Type";
614   #endif
615 
616     sc.stat_cache_engine = STAT_CACHE_ENGINE_SIMPLE; /*(default)*/
617 }
618 
619 void stat_cache_xattrname (const char *name) {
620   #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
621     attrname = name;
622   #else
623     UNUSED(name);
624   #endif
625 }
626 
627 int stat_cache_choose_engine (const buffer *stat_cache_string, log_error_st *errh) {
628     if (buffer_string_is_empty(stat_cache_string))
629         sc.stat_cache_engine = STAT_CACHE_ENGINE_SIMPLE;
630     else if (buffer_eq_slen(stat_cache_string, CONST_STR_LEN("simple")))
631         sc.stat_cache_engine = STAT_CACHE_ENGINE_SIMPLE;
632 #ifdef HAVE_FAM_H
633     else if (buffer_eq_slen(stat_cache_string, CONST_STR_LEN("fam")))
634         sc.stat_cache_engine = STAT_CACHE_ENGINE_FAM;
635 #endif
636     else if (buffer_eq_slen(stat_cache_string, CONST_STR_LEN("disable")))
637         sc.stat_cache_engine = STAT_CACHE_ENGINE_NONE;
638     else {
639         log_error(errh, __FILE__, __LINE__,
640           "server.stat-cache-engine can be one of \"disable\", \"simple\","
641 #ifdef HAVE_FAM_H
642           " \"fam\","
643 #endif
644           " but not: %s", stat_cache_string->ptr);
645         return -1;
646     }
647     return 0;
648 }
649 
650 const buffer * stat_cache_mimetype_by_ext(const array * const mimetypes, const char * const name, const size_t nlen)
651 {
652     const char * const end = name + nlen; /*(end of string)*/
653     const uint32_t used = mimetypes->used;
654     if (used < 16) {
655         for (uint32_t i = 0; i < used; ++i) {
656             /* suffix match */
657             const data_string *ds = (data_string *)mimetypes->data[i];
658             const size_t klen = buffer_string_length(&ds->key);
659             if (klen <= nlen && buffer_eq_icase_ssn(end-klen, ds->key.ptr, klen))
660                 return &ds->value;
661         }
662     }
663     else {
664         const char *s;
665         const data_string *ds;
666         if (nlen) {
667             for (s = end-1; s != name && *s != '/'; --s) ; /*(like memrchr())*/
668             if (*s == '/') ++s;
669         }
670         else {
671             s = name;
672         }
673         /* search for basename, then longest .ext2.ext1, then .ext1, then "" */
674         ds = (const data_string *)array_get_element_klen(mimetypes, s, end - s);
675         if (NULL != ds) return &ds->value;
676         while (++s < end) {
677             while (*s != '.' && ++s != end) ;
678             if (s == end) break;
679             /* search ".ext" then "ext" */
680             ds = (const data_string *)array_get_element_klen(mimetypes, s, end - s);
681             if (NULL != ds) return &ds->value;
682             /* repeat search without leading '.' to handle situation where
683              * admin configured mimetype.assign keys without leading '.' */
684             if (++s < end) {
685                 if (*s == '.') { --s; continue; }
686                 ds = (const data_string *)array_get_element_klen(mimetypes, s, end - s);
687                 if (NULL != ds) return &ds->value;
688             }
689         }
690         /* search for ""; catchall */
691         ds = (const data_string *)array_get_element_klen(mimetypes, CONST_STR_LEN(""));
692         if (NULL != ds) return &ds->value;
693     }
694 
695     return NULL;
696 }
697 
698 #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
699 
700 const buffer * stat_cache_mimetype_by_xattr(const char * const name)
701 {
702     return stat_cache_attr_get(name) ? &attrb : NULL;
703 }
704 
705 const buffer * stat_cache_content_type_get_by_xattr(stat_cache_entry *sce, const array *mimetypes, int use_xattr)
706 {
707     /*(invalid caching if user config has multiple, different
708      * r->conf.mimetypes for same extension (not expected))*/
709     if (!buffer_string_is_empty(&sce->content_type)) return &sce->content_type;
710 
711     if (!S_ISREG(sce->st.st_mode)) return NULL;
712 
713     /* cache mimetype */
714     const buffer *mtype =
715       (use_xattr) ? stat_cache_mimetype_by_xattr(sce->name.ptr) : NULL;
716     if (NULL == mtype)
717         mtype = stat_cache_mimetype_by_ext(mimetypes,CONST_BUF_LEN(&sce->name));
718     if (NULL != mtype) {
719         if (sce->content_type.size) {
720             buffer_copy_buffer(&sce->content_type, mtype);
721         }
722         else if (mtype == &attrb) {
723             sce->content_type.ptr = NULL;
724             buffer_copy_buffer(&sce->content_type, mtype);
725         }
726         else {
727             /*(copy pointers from mimetypes array; avoid allocation)*/
728             sce->content_type.ptr = mtype->ptr;
729             sce->content_type.used = mtype->used;
730             /*(leave sce->content_type.size = 0 to flag not-allocated)*/
731         }
732     }
733     else
734         buffer_clear(&sce->content_type);
735 
736     return &sce->content_type;
737 }
738 
739 #else
740 
741 const buffer * stat_cache_content_type_get_by_ext(stat_cache_entry *sce, const array *mimetypes)
742 {
743     /*(invalid caching if user config has multiple, different
744      * r->conf.mimetypes for same extension (not expected))*/
745     if (!buffer_string_is_empty(&sce->content_type)) return &sce->content_type;
746 
747     if (!S_ISREG(sce->st.st_mode)) return NULL;
748 
749     /* cache mimetype */
750     const buffer * const mtype =
751       stat_cache_mimetype_by_ext(mimetypes, CONST_BUF_LEN(&sce->name));
752     if (NULL != mtype) {
753         /*(copy pointers from mimetypes array; avoid allocation)*/
754         sce->content_type.ptr = mtype->ptr;
755         sce->content_type.used = mtype->used;
756         /*(leave sce->content_type.size = 0 to flag not-allocated)*/
757     }
758     else
759         buffer_clear(&sce->content_type);
760 
761     return &sce->content_type;
762 }
763 
764 #endif
765 
766 const buffer * stat_cache_etag_get(stat_cache_entry *sce, int flags) {
767     /*(invalid caching if user cfg has multiple, different r->conf.etag_flags
768      * for same path (not expected, since etag flags should be by filesystem))*/
769     if (!buffer_string_is_empty(&sce->etag)) return &sce->etag;
770 
771     if (S_ISREG(sce->st.st_mode) || S_ISDIR(sce->st.st_mode)) {
772         if (0 == flags) return NULL;
773         etag_create(&sce->etag, &sce->st, flags);
774         return &sce->etag;
775     }
776 
777     return NULL;
778 }
779 
780 void stat_cache_update_entry(const char *name, size_t len,
781                              struct stat *st, buffer *etagb)
782 {
783     if (sc.stat_cache_engine == STAT_CACHE_ENGINE_NONE) return;
784     force_assert(0 != len);
785     if (name[len-1] == '/') { if (0 == --len) len = 1; }
786     splay_tree **sptree = &sc.files;
787     stat_cache_entry *sce =
788       stat_cache_sptree_find(sptree, name, len);
789     if (sce && buffer_is_equal_string(&sce->name, name, len)) {
790         sce->stat_ts = log_epoch_secs;
791         sce->st = *st; /* etagb might be NULL to clear etag (invalidate) */
792         buffer_copy_string_len(&sce->etag, CONST_BUF_LEN(etagb));
793       #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
794         buffer_clear(&sce->content_type);
795       #endif
796     }
797 }
798 
799 void stat_cache_delete_entry(const char *name, size_t len)
800 {
801     if (sc.stat_cache_engine == STAT_CACHE_ENGINE_NONE) return;
802     force_assert(0 != len);
803     if (name[len-1] == '/') { if (0 == --len) len = 1; }
804     splay_tree **sptree = &sc.files;
805     stat_cache_entry *sce = stat_cache_sptree_find(sptree, name, len);
806     if (sce && buffer_is_equal_string(&sce->name, name, len)) {
807         stat_cache_entry_free(sce);
808         *sptree = splaytree_delete(*sptree, (*sptree)->key);
809     }
810 }
811 
812 void stat_cache_invalidate_entry(const char *name, size_t len)
813 {
814     splay_tree **sptree = &sc.files;
815     stat_cache_entry *sce = stat_cache_sptree_find(sptree, name, len);
816     if (sce && buffer_is_equal_string(&sce->name, name, len)) {
817         sce->stat_ts = 0;
818       #ifdef HAVE_FAM_H
819         if (sce->fam_dir != NULL) {
820             --((fam_dir_entry *)sce->fam_dir)->refcnt;
821             sce->fam_dir = NULL;
822         }
823       #endif
824     }
825 }
826 
827 #ifdef HAVE_FAM_H
828 
829 static void stat_cache_invalidate_dir_tree_walk(splay_tree *t,
830                                                 const char *name, size_t len)
831 {
832     if (t->left)  stat_cache_invalidate_dir_tree_walk(t->left,  name, len);
833     if (t->right) stat_cache_invalidate_dir_tree_walk(t->right, name, len);
834 
835     buffer *b = &((stat_cache_entry *)t->data)->name;
836     size_t blen = buffer_string_length(b);
837     if (blen > len && b->ptr[len] == '/' && 0 == memcmp(b->ptr, name, len)) {
838         stat_cache_entry *sce = t->data;
839         sce->stat_ts = 0;
840         if (sce->fam_dir != NULL) {
841             --((fam_dir_entry *)sce->fam_dir)->refcnt;
842             sce->fam_dir = NULL;
843         }
844     }
845 }
846 
847 static void stat_cache_invalidate_dir_tree(const char *name, size_t len)
848 {
849     splay_tree * const sptree = sc.files;
850     if (sptree) stat_cache_invalidate_dir_tree_walk(sptree, name, len);
851 }
852 
853 #endif
854 
855 /*
856  * walk though splay_tree and collect contents of dir tree.
857  * remove tagged entries in a second loop
858  */
859 
860 static void stat_cache_tag_dir_tree(splay_tree *t, const char *name, size_t len,
861                                     int *keys, int *ndx)
862 {
863     if (*ndx == 8192) return; /*(must match num array entries in keys[])*/
864     if (t->left)  stat_cache_tag_dir_tree(t->left,  name, len, keys, ndx);
865     if (t->right) stat_cache_tag_dir_tree(t->right, name, len, keys, ndx);
866     if (*ndx == 8192) return; /*(must match num array entries in keys[])*/
867 
868     buffer *b = &((stat_cache_entry *)t->data)->name;
869     size_t blen = buffer_string_length(b);
870     if (blen > len && b->ptr[len] == '/' && 0 == memcmp(b->ptr, name, len))
871         keys[(*ndx)++] = t->key;
872 }
873 
874 static void stat_cache_prune_dir_tree(const char *name, size_t len)
875 {
876     int max_ndx, i;
877     int keys[8192]; /* 32k size on stack */
878     splay_tree *sptree = sc.files;
879     do {
880         if (!sptree) return;
881         max_ndx = 0;
882         stat_cache_tag_dir_tree(sptree, name, len, keys, &max_ndx);
883         for (i = 0; i < max_ndx; ++i) {
884             const int ndx = keys[i];
885             splay_tree *node = sptree = splaytree_splay(sptree, ndx);
886             if (node && node->key == ndx) {
887                 stat_cache_entry_free(node->data);
888                 sptree = splaytree_delete(sptree, ndx);
889             }
890         }
891     } while (max_ndx == sizeof(keys)/sizeof(int));
892     sc.files = sptree;
893 }
894 
895 static void stat_cache_delete_tree(const char *name, size_t len)
896 {
897     stat_cache_delete_entry(name, len);
898     stat_cache_prune_dir_tree(name, len);
899 }
900 
901 void stat_cache_delete_dir(const char *name, size_t len)
902 {
903     force_assert(0 != len);
904     if (name[len-1] == '/') { if (0 == --len) len = 1; }
905     stat_cache_delete_tree(name, len);
906   #ifdef HAVE_FAM_H
907     if (sc.stat_cache_engine == STAT_CACHE_ENGINE_FAM) {
908         splay_tree **sptree = &sc.scf->dirs;
909         fam_dir_entry *fam_dir = stat_cache_sptree_find(sptree, name, len);
910         if (fam_dir && buffer_is_equal_string(fam_dir->name, name, len))
911             fam_dir_invalidate_node(fam_dir);
912         if (*sptree) fam_dir_invalidate_tree(*sptree, name, len);
913         fam_dir_periodic_cleanup();
914     }
915   #endif
916 }
917 
918 /***
919  *
920  *
921  *
922  * returns:
923  *  - HANDLER_FINISHED on cache-miss (don't forget to reopen the file)
924  *  - HANDLER_ERROR on stat() failed -> see errno for problem
925  */
926 
927 stat_cache_entry * stat_cache_get_entry(const buffer *name) {
928 	stat_cache_entry *sce = NULL;
929 	struct stat st;
930 	int file_ndx;
931 
932 	/* consistency: ensure lookup name does not end in '/' unless root "/"
933 	 * (but use full path given with stat(), even with trailing '/') */
934 	int final_slash = 0;
935 	size_t len = buffer_string_length(name);
936 	force_assert(0 != len);
937 	if (name->ptr[len-1] == '/') { final_slash = 1; if (0 == --len) len = 1; }
938 	/* Note: paths are expected to be normalized before calling stat_cache,
939 	 * e.g. without repeated '/' */
940 
941 	if (name->ptr[0] != '/') {
942 		errno = EINVAL;
943 		return NULL;
944 	}
945 
946 	/*
947 	 * check if the directory for this file has changed
948 	 */
949 
950 	const time_t cur_ts = log_epoch_secs;
951 
952 	file_ndx = hashme(name->ptr, len);
953 	splay_tree * const sptree = sc.files = splaytree_splay(sc.files, file_ndx);
954 
955 	if (sptree && (sptree->key == file_ndx)) {
956 		/* we have seen this file already and
957 		 * don't stat() it again in the same second */
958 
959 		sce = sptree->data;
960 
961 		/* check if the name is the same, we might have a collision */
962 
963 		if (buffer_is_equal_string(&sce->name, name->ptr, len)) {
964 			if (sc.stat_cache_engine == STAT_CACHE_ENGINE_SIMPLE) {
965 				if (sce->stat_ts == cur_ts) {
966 					if (final_slash && !S_ISDIR(sce->st.st_mode)) {
967 						errno = ENOTDIR;
968 						return NULL;
969 					}
970 					return sce;
971 				}
972 			}
973 		      #ifdef HAVE_FAM_H
974 			else if (sc.stat_cache_engine == STAT_CACHE_ENGINE_FAM
975 				 && sce->fam_dir) { /* entry is in monitored dir */
976 				/* re-stat() periodically, even if monitoring for changes
977 				 * (due to limitations in stat_cache.c use of FAM)
978 				 * (gaps due to not continually monitoring an entire tree) */
979 				if (cur_ts - sce->stat_ts < 16) {
980 					if (final_slash && !S_ISDIR(sce->st.st_mode)) {
981 						errno = ENOTDIR;
982 						return NULL;
983 					}
984 					return sce;
985 				}
986 			}
987 		      #endif
988 		} else {
989 			/* collision, forget about the entry */
990 			sce = NULL;
991 		}
992 	}
993 
994 	if (-1 == stat(name->ptr, &st)) {
995 		return NULL;
996 	}
997 
998 	if (S_ISREG(st.st_mode)) {
999 		/* fix broken stat/open for symlinks to reg files with appended slash on freebsd,osx */
1000 		if (name->ptr[buffer_string_length(name) - 1] == '/') {
1001 			errno = ENOTDIR;
1002 			return NULL;
1003 		}
1004 	}
1005 
1006 	if (NULL == sce) {
1007 
1008 		sce = stat_cache_entry_init();
1009 		buffer_copy_string_len(&sce->name, name->ptr, len);
1010 
1011 		/* already splayed file_ndx */
1012 		if (NULL != sptree && sptree->key == file_ndx) {
1013 			/* hash collision: replace old entry */
1014 			stat_cache_entry_free(sptree->data);
1015 			sptree->data = sce;
1016 		} else {
1017 			sc.files = splaytree_insert(sptree, file_ndx, sce);
1018 		}
1019 
1020 	} else {
1021 
1022 		buffer_clear(&sce->etag);
1023 	      #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
1024 		buffer_clear(&sce->content_type);
1025 	      #endif
1026 
1027 	}
1028 
1029 	sce->st = st; /*(copy prior to calling fam_dir_monitor())*/
1030 
1031 #ifdef HAVE_FAM_H
1032 	if (sc.stat_cache_engine == STAT_CACHE_ENGINE_FAM) {
1033 		if (sce->fam_dir) --((fam_dir_entry *)sce->fam_dir)->refcnt;
1034 		sce->fam_dir =
1035 		  fam_dir_monitor(sc.scf, CONST_BUF_LEN(name), &st);
1036 	      #if 0 /*(performed below)*/
1037 		if (NULL != sce->fam_dir) {
1038 			/*(may have been invalidated by dir change)*/
1039 			sce->stat_ts = cur_ts;
1040 		}
1041 	      #endif
1042 	}
1043 #endif
1044 
1045 	sce->stat_ts = cur_ts;
1046 	return sce;
1047 }
1048 
1049 int stat_cache_path_contains_symlink(const buffer *name, log_error_st *errh) {
1050     /* caller should check for symlinks only if we should block symlinks. */
1051 
1052     /* catch the obvious symlinks
1053      *
1054      * this is not a secure check as we still have a race-condition between
1055      * the stat() and the open. We can only solve this by
1056      * 1. open() the file
1057      * 2. fstat() the fd
1058      *
1059      * and keeping the file open for the rest of the time. But this can
1060      * only be done at network level.
1061      * */
1062 
1063   #ifdef HAVE_LSTAT
1064     /* we assume "/" can not be symlink,
1065      * so skip the symlink stuff if path is "/" */
1066     size_t len = buffer_string_length(name);
1067     force_assert(0 != len);
1068     force_assert(name->ptr[0] == '/');
1069     if (1 == len) return 0;
1070    #ifndef PATH_MAX
1071    #define PATH_MAX 4096
1072    #endif
1073     if (len >= PATH_MAX) return -1;
1074 
1075     char buf[PATH_MAX];
1076     memcpy(buf, name->ptr, len);
1077     char *s_cur = buf+len;
1078     do {
1079         *s_cur = '\0';
1080         struct stat st;
1081         if (0 == lstat(buf, &st)) {
1082             if (S_ISLNK(st.st_mode)) return 1;
1083         }
1084         else {
1085             log_perror(errh, __FILE__, __LINE__, "lstat failed for: %s", buf);
1086             return -1;
1087         }
1088     } while ((s_cur = strrchr(buf, '/')) != buf);
1089   #endif
1090 
1091     return 0;
1092 }
1093 
1094 int stat_cache_open_rdonly_fstat (const buffer *name, struct stat *st, int symlinks) {
1095 	/*(Note: O_NOFOLLOW affects only the final path segment, the target file,
1096 	 * not any intermediate symlinks along the path)*/
1097 	const int fd = fdevent_open_cloexec(name->ptr, symlinks, O_RDONLY, 0);
1098 	if (fd >= 0) {
1099 		if (0 == fstat(fd, st)) {
1100 			return fd;
1101 		} else {
1102 			close(fd);
1103 		}
1104 	}
1105 	return -1;
1106 }
1107 
1108 /**
1109  * remove stat() from cache which haven't been stat()ed for
1110  * more than 2 seconds
1111  *
1112  *
1113  * walk though the stat-cache, collect the ids which are too old
1114  * and remove them in a second loop
1115  */
1116 
1117 static void stat_cache_tag_old_entries(splay_tree * const t, int * const keys, uint32_t * const ndx, const time_t max_age, const time_t cur_ts) {
1118     if (!t) return;
1119 
1120     stat_cache_tag_old_entries(t->left, keys, ndx, max_age, cur_ts);
1121     stat_cache_tag_old_entries(t->right, keys, ndx, max_age, cur_ts);
1122 
1123     const stat_cache_entry * const sce = t->data;
1124 
1125     if (cur_ts - sce->stat_ts > max_age) {
1126         keys[(*ndx)++] = t->key;
1127     }
1128 }
1129 
1130 static void stat_cache_periodic_cleanup(const time_t max_age, const time_t cur_ts) {
1131 	splay_tree *sptree = sc.files;
1132 	if (!sptree) return;
1133 
1134 	int * const keys = calloc(1, sizeof(int) * sptree->size);
1135 	force_assert(NULL != keys);
1136 
1137 	uint32_t max_ndx = 0;
1138 	stat_cache_tag_old_entries(sptree, keys, &max_ndx, max_age, cur_ts);
1139 
1140 	for (uint32_t i = 0; i < max_ndx; ++i) {
1141 		int ndx = keys[i];
1142 		sptree = splaytree_splay(sptree, ndx);
1143 		if (sptree && sptree->key == ndx) {
1144 			stat_cache_entry_free(sptree->data);
1145 			sptree = splaytree_delete(sptree, ndx);
1146 		}
1147 	}
1148 
1149 	sc.files = sptree;
1150 
1151 	free(keys);
1152 }
1153 
1154 void stat_cache_trigger_cleanup(void) {
1155 	time_t max_age = 2;
1156 
1157       #ifdef HAVE_FAM_H
1158 	if (STAT_CACHE_ENGINE_FAM == sc.stat_cache_engine) {
1159 		if (log_epoch_secs & 0x1F) return;
1160 		/* once every 32 seconds (0x1F == 31) */
1161 		max_age = 32;
1162 		fam_dir_periodic_cleanup();
1163 		/* By doing this before stat_cache_periodic_cleanup(),
1164 		 * entries used within the next max_age secs will remain
1165 		 * monitored, instead of effectively flushing and
1166 		 * rebuilding the FAM monitoring every max_age seconds */
1167 	}
1168       #endif
1169 
1170 	stat_cache_periodic_cleanup(max_age, log_epoch_secs);
1171 }
1172