xref: /lighttpd1.4/src/stat_cache.c (revision 96abd9cf)
1 #include "first.h"
2 
3 #include "stat_cache.h"
4 #include "log.h"
5 #include "fdevent.h"
6 #include "etag.h"
7 #include "algo_splaytree.h"
8 
9 #include <sys/types.h>
10 #include <sys/stat.h>
11 
12 #include <stdlib.h>
13 #include <string.h>
14 #include <errno.h>
15 #include <unistd.h>
16 #include <fcntl.h>
17 
18 #if defined(HAVE_SYS_XATTR_H)
19 # include <sys/xattr.h>
20 #elif defined(HAVE_ATTR_ATTRIBUTES_H)
21 # include <attr/attributes.h>
22 #endif
23 
24 #ifdef HAVE_SYS_EXTATTR_H
25 # include <sys/extattr.h>
26 #endif
27 
28 #ifndef HAVE_LSTAT
29 #define lstat stat
30 #ifndef S_ISLNK
31 #define S_ISLNK(mode) (0)
32 #endif
33 #endif
34 
35 /*
36  * stat-cache
37  *
38  * - a splay-tree is used as we can use the caching effect of it
39  */
40 
41 enum {
42   STAT_CACHE_ENGINE_SIMPLE, /*(default)*/
43   STAT_CACHE_ENGINE_NONE,
44   STAT_CACHE_ENGINE_FAM
45 };
46 
47 struct stat_cache_fam;  /* declaration */
48 
49 typedef struct stat_cache {
50 	int stat_cache_engine;
51 	splay_tree *files; /* nodes of tree are (stat_cache_entry *) */
52 	struct stat_cache_fam *scf;
53 } stat_cache;
54 
55 static stat_cache sc;
56 
57 
58 static void * stat_cache_sptree_find(splay_tree ** const sptree,
59                                      const char * const name,
60                                      uint32_t len)
61 {
62     const int ndx = splaytree_djbhash(name, len);
63     *sptree = splaytree_splay(*sptree, ndx);
64     return (*sptree && (*sptree)->key == ndx) ? (*sptree)->data : NULL;
65 }
66 
67 
68 #ifdef HAVE_FAM_H
69 
70 /* monitor changes in directories using FAM
71  *
72  * This implementation employing FAM monitors directories as they are used,
73  * and maintains a reference count for cache use within stat_cache.c.
74  * A periodic job runs in lighttpd every 32 seconds, expiring entries unused
75  * in last 64 seconds out of the cache and cancelling FAM monitoring.  Items
76  * within the cache are checked against the filesystem upon use if last stat()
77  * was greater than or equal to 16 seconds ago.
78  *
79  * This implementation does not monitor every directory in a tree, and therefore
80  * the cache may get out-of-sync with the filesystem.  Delays in receiving and
81  * processing events from FAM might also lead to stale cache entries.
82  *
83  * For many websites, a large number of files are seldom, if ever, modified,
84  * and a common practice with images is to create a new file with a new name
85  * when a new version is needed, in order for client browsers and CDNs to better
86  * cache the content.  Given this, most use will see little difference in
87  * performance between server.stat-cache-engine = "fam" and "simple" (default).
88  * The default server.stat-cache-engine = "simple" calls stat() on a target once
89  * per second, and reuses that information until the next second.  For use where
90  * changes must be immediately visible, server.stat-cache-engine = "disable"
91  * should be used.
92  *
93  * When considering use of server.stat-cache-engine = "fam", there are a few
94  * additional limitations for this cache implementation using FAM.
95  * - symlinks to files located outside of the current directory do not result
96  *   in changes to that file being monitored (unless that file is in a directory
97  *   which is monitored as a result of a different request).  symlinks can be
98  *   chained and can be circular.  This implementation *does not* readlink() or
99  *   realpath() to resolve the chains to find and monitor the ultimate target
100  *   directory.  While symlinks to files located outside the current directory
101  *   are not monitored, symlinks to directories *are* monitored, though chains
102  *   of symlinks to directories do not result in monitoring of the directories
103  *   containing intermediate symlinks to the target directory.
104  * - directory rename of a directory which is not currently being monitored will
105  *   result in stale information in the cache if there is a subdirectory that is
106  *   being monitored.
107  * Even though lighttpd will not receive FAM events in the above cases, lighttpd
108  * does re-validate the information in the cache upon use if the cache entry has
109  * not been checked in 16 seconds, so that is the upper limit for use of stale
110  * data.
111  *
112  * Use of server.stat-cache-engine = "fam" is discouraged for extremely volatile
113  * directories such as temporary directories (e.g. /tmp and maybe /var/tmp) due
114  * to the overhead of processing the additional noise generated from changes.
115  * Related, server.stat-cache-engine = "fam" is not recommended on trees of
116  * untrusted files where a malicious user could generate an excess of change
117  * events.
118  *
119  * Internal note: lighttpd walks the caches to prune trees in stat_cache when an
120  * event is received for a directory (or symlink to a directory) which has been
121  * deleted or renamed.  The splaytree data structure is suboptimal for frequent
122  * changes of large directories trees where there have been a large number of
123  * different files recently accessed and part of the stat_cache.
124  */
125 
126 #include <fam.h>
127 
128 #ifdef HAVE_FAMNOEXISTS
129 #ifndef LIGHTTPD_STATIC
130 #include <dlfcn.h>
131 #endif
132 #endif
133 
134 typedef struct fam_dir_entry {
135 	buffer *name;
136 	int refcnt;
137 	FAMRequest req;
138 	time_t stat_ts;
139 	dev_t st_dev;
140 	ino_t st_ino;
141 	struct fam_dir_entry *fam_parent;
142 } fam_dir_entry;
143 
144 typedef struct stat_cache_fam {
145 	splay_tree *dirs; /* the nodes of the tree are fam_dir_entry */
146 	FAMConnection fam;
147 	log_error_st *errh;
148 	fdevents *ev;
149 	fdnode *fdn;
150 	int fd;
151 } stat_cache_fam;
152 
153 static fam_dir_entry * fam_dir_entry_init(const char *name, size_t len)
154 {
155     fam_dir_entry * const fam_dir = calloc(1, sizeof(*fam_dir));
156     force_assert(NULL != fam_dir);
157 
158     fam_dir->name = buffer_init();
159     buffer_copy_string_len(fam_dir->name, name, len);
160     fam_dir->refcnt = 0;
161 
162     return fam_dir;
163 }
164 
165 static void fam_dir_entry_free(fam_dir_entry *fam_dir)
166 {
167     if (!fam_dir) return;
168     /*(fam_dir->parent might be invalid pointer here; ignore)*/
169     buffer_free(fam_dir->name);
170     free(fam_dir);
171 }
172 
173 static void fam_dir_invalidate_node(fam_dir_entry *fam_dir)
174 {
175     fam_dir->stat_ts = 0;
176     if (fam_dir->fam_parent) {
177         --fam_dir->fam_parent->refcnt;
178         fam_dir->fam_parent = NULL;
179     }
180 }
181 
182 /*
183  * walk though splay_tree and collect contents of dir tree.
184  * remove tagged entries in a second loop
185  */
186 
187 static void fam_dir_tag_refcnt(splay_tree *t, int *keys, int *ndx)
188 {
189     if (*ndx == 8192) return; /*(must match num array entries in keys[])*/
190     if (t->left)  fam_dir_tag_refcnt(t->left,  keys, ndx);
191     if (t->right) fam_dir_tag_refcnt(t->right, keys, ndx);
192     if (*ndx == 8192) return; /*(must match num array entries in keys[])*/
193 
194     fam_dir_entry * const fam_dir = t->data;
195     if (0 == fam_dir->refcnt) {
196         fam_dir_invalidate_node(fam_dir);
197         keys[(*ndx)++] = t->key;
198     }
199 }
200 
201 __attribute_noinline__
202 static void fam_dir_periodic_cleanup() {
203     stat_cache_fam * const scf = sc.scf;
204     int max_ndx, i;
205     int keys[8192]; /* 32k size on stack */
206     do {
207         if (!scf->dirs) break;
208         max_ndx = 0;
209         fam_dir_tag_refcnt(scf->dirs, keys, &max_ndx);
210         for (i = 0; i < max_ndx; ++i) {
211             const int ndx = keys[i];
212             splay_tree *node = scf->dirs = splaytree_splay(scf->dirs, ndx);
213             if (node && node->key == ndx) {
214                 fam_dir_entry *fam_dir = node->data;
215                 scf->dirs = splaytree_delete(scf->dirs, ndx);
216                 FAMCancelMonitor(&scf->fam, &fam_dir->req);
217                 fam_dir_entry_free(fam_dir);
218             }
219         }
220     } while (max_ndx == sizeof(keys)/sizeof(int));
221 }
222 
223 static void fam_dir_invalidate_tree(splay_tree *t, const char *name, size_t len)
224 {
225   #ifdef __clang_analyzer__
226     force_assert(name);
227   #endif
228     /*force_assert(t);*/
229     if (t->left)  fam_dir_invalidate_tree(t->left,  name, len);
230     if (t->right) fam_dir_invalidate_tree(t->right, name, len);
231 
232     fam_dir_entry * const fam_dir = t->data;
233   #ifdef __clang_analyzer__
234     force_assert(fam_dir);
235   #endif
236     buffer *b = fam_dir->name;
237     size_t blen = buffer_string_length(b);
238     if (blen > len && b->ptr[len] == '/' && 0 == memcmp(b->ptr, name, len))
239         fam_dir_invalidate_node(fam_dir);
240 }
241 
242 /* declarations */
243 static void stat_cache_delete_tree(const char *name, uint32_t len);
244 static void stat_cache_invalidate_dir_tree(const char *name, size_t len);
245 
246 static void stat_cache_handle_fdevent_in(stat_cache_fam *scf)
247 {
248     for (int i = 0, ndx; i || (i = FAMPending(&scf->fam)) > 0; --i) {
249         FAMEvent fe;
250         if (FAMNextEvent(&scf->fam, &fe) < 0) break;
251 
252         /* ignore events which may have been pending for
253          * paths recently cancelled via FAMCancelMonitor() */
254         ndx = (int)(intptr_t)fe.userdata;
255         scf->dirs = splaytree_splay(scf->dirs, ndx);
256         if (!scf->dirs || scf->dirs->key != ndx) {
257             continue;
258         }
259         fam_dir_entry *fam_dir = scf->dirs->data;
260         if (FAMREQUEST_GETREQNUM(&fam_dir->req)
261             != FAMREQUEST_GETREQNUM(&fe.fr)) {
262             continue;
263         }
264 
265         if (fe.filename[0] != '/') {
266             buffer * const n = fam_dir->name;
267             fam_dir_entry *fam_link;
268             size_t len;
269             switch(fe.code) {
270             case FAMCreated:
271                 /* file created in monitored dir modifies dir and
272                  * we should get a separate FAMChanged event for dir.
273                  * Therefore, ignore file FAMCreated event here.
274                  * Also, if FAMNoExists() is used, might get spurious
275                  * FAMCreated events as changes are made e.g. in monitored
276                  * sub-sub-sub dirs and the library discovers new (already
277                  * existing) dir entries */
278                 continue;
279             case FAMChanged:
280                 /* file changed in monitored dir does not modify dir */
281             case FAMDeleted:
282             case FAMMoved:
283                 /* file deleted or moved in monitored dir modifies dir,
284                  * but FAM provides separate notification for that */
285 
286                 /* temporarily append filename to dir in fam_dir->name to
287                  * construct path, then delete stat_cache entry (if any)*/
288                 len = buffer_string_length(n);
289                 buffer_append_string_len(n, CONST_STR_LEN("/"));
290                 buffer_append_string_len(n,fe.filename,strlen(fe.filename));
291                 /* (alternatively, could chose to stat() and update)*/
292                 stat_cache_invalidate_entry(CONST_BUF_LEN(n));
293 
294                 fam_link = /*(check if might be symlink to monitored dir)*/
295                   stat_cache_sptree_find(&scf->dirs, CONST_BUF_LEN(n));
296                 if (fam_link && !buffer_is_equal(fam_link->name, n))
297                     fam_link = NULL;
298 
299                 buffer_string_set_length(n, len);
300 
301                 if (fam_link) {
302                     /* replaced symlink changes containing dir */
303                     stat_cache_invalidate_entry(CONST_BUF_LEN(n));
304                     /* handle symlink to dir as deleted dir below */
305                     fe.code = FAMDeleted;
306                     fam_dir = fam_link;
307                     break;
308                 }
309                 continue;
310             default:
311                 continue;
312             }
313         }
314 
315         switch(fe.code) {
316         case FAMChanged:
317             stat_cache_invalidate_entry(CONST_BUF_LEN(fam_dir->name));
318             break;
319         case FAMDeleted:
320         case FAMMoved:
321             stat_cache_delete_tree(CONST_BUF_LEN(fam_dir->name));
322             fam_dir_invalidate_node(fam_dir);
323             if (scf->dirs)
324                 fam_dir_invalidate_tree(scf->dirs,CONST_BUF_LEN(fam_dir->name));
325             fam_dir_periodic_cleanup();
326             break;
327         default:
328             break;
329         }
330     }
331 }
332 
333 static handler_t stat_cache_handle_fdevent(void *ctx, int revent)
334 {
335 	stat_cache_fam * const scf = ctx; /* sc.scf */
336 
337 	if (revent & FDEVENT_IN) {
338 		stat_cache_handle_fdevent_in(scf);
339 	}
340 
341 	if (revent & (FDEVENT_HUP|FDEVENT_RDHUP)) {
342 		/* fam closed the connection */
343 		log_error(scf->errh, __FILE__, __LINE__,
344 		  "FAM connection closed; disabling stat_cache.");
345 		/* (although effectively STAT_CACHE_ENGINE_NONE,
346 		 *  do not change here so that periodic jobs clean up memory)*/
347 		/*sc.stat_cache_engine = STAT_CACHE_ENGINE_NONE; */
348 		fdevent_fdnode_event_del(scf->ev, scf->fdn);
349 		fdevent_unregister(scf->ev, scf->fd);
350 		scf->fdn = NULL;
351 
352 		FAMClose(&scf->fam);
353 		scf->fd = -1;
354 	}
355 
356 	return HANDLER_GO_ON;
357 }
358 
359 static stat_cache_fam * stat_cache_init_fam(fdevents *ev, log_error_st *errh) {
360 	stat_cache_fam *scf = calloc(1, sizeof(*scf));
361 	force_assert(scf);
362 	scf->fd = -1;
363 	scf->ev = ev;
364 	scf->errh = errh;
365 
366 	/* setup FAM */
367 	if (0 != FAMOpen2(&scf->fam, "lighttpd")) {
368 		log_error(errh, __FILE__, __LINE__,
369 		  "could not open a fam connection, dying.");
370 		return NULL;
371 	}
372       #ifdef HAVE_FAMNOEXISTS
373       #ifdef LIGHTTPD_STATIC
374 	FAMNoExists(&scf->fam);
375       #else
376 	int (*FAMNoExists_fn)(FAMConnection *);
377 	FAMNoExists_fn =
378 	  (int (*)(FAMConnection *))(intptr_t)dlsym(RTLD_DEFAULT,"FAMNoExists");
379 	if (FAMNoExists_fn) FAMNoExists_fn(&scf->fam);
380       #endif
381       #endif
382 
383 	scf->fd = FAMCONNECTION_GETFD(&scf->fam);
384 	fdevent_setfd_cloexec(scf->fd);
385 	scf->fdn = fdevent_register(scf->ev, scf->fd, stat_cache_handle_fdevent, scf);
386 	fdevent_fdnode_event_set(scf->ev, scf->fdn, FDEVENT_IN | FDEVENT_RDHUP);
387 
388 	return scf;
389 }
390 
391 static void stat_cache_free_fam(stat_cache_fam *scf) {
392 	if (NULL == scf) return;
393 
394 	while (scf->dirs) {
395 		/*(skip entry invalidation and FAMCancelMonitor())*/
396 		splay_tree *node = scf->dirs;
397 		fam_dir_entry_free((fam_dir_entry *)node->data);
398 		scf->dirs = splaytree_delete(scf->dirs, node->key);
399 	}
400 
401 	if (-1 != scf->fd) {
402 		/*scf->fdn already cleaned up in fdevent_free()*/
403 		FAMClose(&scf->fam);
404 		/*scf->fd = -1;*/
405 	}
406 
407 	free(scf);
408 }
409 
410 static fam_dir_entry * fam_dir_monitor(stat_cache_fam *scf, char *fn, uint32_t dirlen, struct stat *st)
411 {
412     if (NULL == scf->fdn) return NULL; /* FAM connection closed; do nothing */
413     const int fn_is_dir = S_ISDIR(st->st_mode);
414     /*force_assert(0 != dirlen);*/
415     /*force_assert(fn[0] == '/');*/
416     /* consistency: ensure fn does not end in '/' unless root "/"
417      * FAM events will not end in '/', so easier to match this way */
418     if (fn[dirlen-1] == '/') --dirlen;
419     if (0 == dirlen) dirlen = 1; /* root dir ("/") */
420     /* Note: paths are expected to be normalized before calling stat_cache,
421      * e.g. without repeated '/' */
422     if (!fn_is_dir) {
423         while (fn[--dirlen] != '/') ;
424         if (0 == dirlen) dirlen = 1; /*(should not happen for file)*/
425     }
426     int dir_ndx = splaytree_djbhash(fn, dirlen);
427     fam_dir_entry *fam_dir = NULL;
428 
429     scf->dirs = splaytree_splay(scf->dirs, dir_ndx);
430     if (NULL != scf->dirs && scf->dirs->key == dir_ndx) {
431         fam_dir = scf->dirs->data;
432         if (!buffer_is_equal_string(fam_dir->name, fn, dirlen)) {
433             /* hash collision; preserve existing
434              * do not monitor new to avoid cache thrashing */
435             return NULL;
436         }
437         /* directory already registered */
438     }
439 
440     const time_t cur_ts = log_epoch_secs;
441     struct stat lst;
442     int ck_dir = fn_is_dir;
443     if (!fn_is_dir && (NULL==fam_dir || cur_ts - fam_dir->stat_ts >= 16)) {
444         ck_dir = 1;
445         /*(temporarily modify fn)*/
446         fn[dirlen] = '\0';
447         if (0 != lstat(fn, &lst)) {
448             fn[dirlen] = '/';
449             return NULL;
450         }
451         if (!S_ISLNK(lst.st_mode)) {
452             st = &lst;
453         }
454         else if (0 != stat(fn, st)) { /*st passed in now is stat() of dir*/
455             fn[dirlen] = '/';
456             return NULL;
457         }
458         fn[dirlen] = '/';
459     }
460 
461     int ck_lnk = (NULL == fam_dir);
462     if (ck_dir && NULL != fam_dir) {
463         /* check stat() matches device and inode, just in case an external event
464          * not being monitored occurs (e.g. rename of unmonitored parent dir)*/
465         if (st->st_dev != fam_dir->st_dev || st->st_ino != fam_dir->st_ino) {
466             ck_lnk = 1;
467             /*(modifies scf->dirs but no need to re-splay for dir_ndx since
468              * fam_dir is not NULL and so splaytree_insert not called below)*/
469             if (scf->dirs) fam_dir_invalidate_tree(scf->dirs, fn, dirlen);
470             if (!fn_is_dir) /*(if dir, caller is updating stat_cache_entry)*/
471                 stat_cache_update_entry(fn, dirlen, st, NULL);
472             /*(must not delete tree since caller is holding a valid node)*/
473             stat_cache_invalidate_dir_tree(fn, dirlen);
474             if (0 != FAMCancelMonitor(&scf->fam, &fam_dir->req)
475                 || 0 != FAMMonitorDirectory(&scf->fam, fam_dir->name->ptr,
476                                             &fam_dir->req,
477                                             (void *)(intptr_t)dir_ndx)) {
478                 fam_dir->stat_ts = 0; /* invalidate */
479                 return NULL;
480             }
481             fam_dir->st_dev = st->st_dev;
482             fam_dir->st_ino = st->st_ino;
483         }
484         fam_dir->stat_ts = cur_ts;
485     }
486 
487     if (NULL == fam_dir) {
488         fam_dir = fam_dir_entry_init(fn, dirlen);
489 
490         if (0 != FAMMonitorDirectory(&scf->fam,fam_dir->name->ptr,&fam_dir->req,
491                                      (void *)(intptr_t)dir_ndx)) {
492             log_error(scf->errh, __FILE__, __LINE__,
493               "monitoring dir failed: %s file: %s %s",
494               fam_dir->name->ptr, fn, FamErrlist[FAMErrno]);
495             fam_dir_entry_free(fam_dir);
496             return NULL;
497         }
498 
499         scf->dirs = splaytree_insert(scf->dirs, dir_ndx, fam_dir);
500         fam_dir->stat_ts= cur_ts;
501         fam_dir->st_dev = st->st_dev;
502         fam_dir->st_ino = st->st_ino;
503     }
504 
505     if (ck_lnk) {
506         if (fn_is_dir) {
507             /*(temporarily modify fn)*/
508             char e = fn[dirlen];
509             fn[dirlen] = '\0';
510             if (0 != lstat(fn, &lst)) {
511                 fn[dirlen] = e;
512                 return NULL;
513             }
514             fn[dirlen] = e;
515         }
516         if (fam_dir->fam_parent) {
517             --fam_dir->fam_parent->refcnt;
518             fam_dir->fam_parent = NULL;
519         }
520         if (S_ISLNK(lst.st_mode)) {
521             fam_dir->fam_parent = fam_dir_monitor(scf, fn, dirlen, &lst);
522         }
523     }
524 
525     ++fam_dir->refcnt;
526     return fam_dir;
527 }
528 
529 #endif
530 
531 
532 static stat_cache_entry * stat_cache_entry_init(void) {
533     stat_cache_entry *sce = calloc(1, sizeof(*sce));
534     force_assert(NULL != sce);
535     sce->fd = -1;
536     return sce;
537 }
538 
539 static void stat_cache_entry_free(void *data) {
540     stat_cache_entry *sce = data;
541     if (!sce) return;
542 
543   #ifdef HAVE_FAM_H
544     /*(decrement refcnt only;
545      * defer cancelling FAM monitor on dir even if refcnt reaches zero)*/
546     if (sce->fam_dir) --((fam_dir_entry *)sce->fam_dir)->refcnt;
547   #endif
548 
549     free(sce->name.ptr);
550     free(sce->etag.ptr);
551     if (sce->content_type.size) free(sce->content_type.ptr);
552     if (sce->fd >= 0) close(sce->fd);
553 
554     free(sce);
555 }
556 
557 #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
558 
559 static const char *attrname = "Content-Type";
560 static char attrval[128];
561 static buffer attrb = { attrval, 0, 0 };
562 
563 static int stat_cache_attr_get(const char *name) {
564   #if defined(HAVE_XATTR)
565    #if defined(HAVE_SYS_XATTR_H)
566     ssize_t attrlen;
567     if (0 < (attrlen = getxattr(name, attrname,
568                                 attrval, sizeof(attrval)-1)))
569    #else
570     int attrlen = sizeof(attrval)-1;
571     if (0 == attr_get(name, attrname, attrval, &attrlen, 0))
572    #endif
573   #elif defined(HAVE_EXTATTR)
574     ssize_t attrlen;
575     if (0 < (attrlen = extattr_get_file(name, EXTATTR_NAMESPACE_USER, attrname,
576                                         attrval, sizeof(attrval)-1)))
577   #endif
578     {
579         attrval[attrlen] = '\0';
580         attrb.used = (uint32_t)(attrlen + 1);
581         return 1;
582     }
583     return 0;
584 }
585 
586 #endif
587 
588 int stat_cache_init(fdevents *ev, log_error_st *errh) {
589   #ifdef HAVE_FAM_H
590     if (sc.stat_cache_engine == STAT_CACHE_ENGINE_FAM) {
591         sc.scf = stat_cache_init_fam(ev, errh);
592         if (NULL == sc.scf) return 0;
593     }
594   #else
595     UNUSED(ev);
596     UNUSED(errh);
597   #endif
598 
599     return 1;
600 }
601 
602 void stat_cache_free(void) {
603     splay_tree *sptree = sc.files;
604     while (sptree) {
605         stat_cache_entry_free(sptree->data);
606         sptree = splaytree_delete(sptree, sptree->key);
607     }
608     sc.files = NULL;
609 
610   #ifdef HAVE_FAM_H
611     stat_cache_free_fam(sc.scf);
612     sc.scf = NULL;
613   #endif
614 
615   #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
616     attrname = "Content-Type";
617   #endif
618 
619     sc.stat_cache_engine = STAT_CACHE_ENGINE_SIMPLE; /*(default)*/
620 }
621 
622 void stat_cache_xattrname (const char *name) {
623   #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
624     attrname = name;
625   #else
626     UNUSED(name);
627   #endif
628 }
629 
630 int stat_cache_choose_engine (const buffer *stat_cache_string, log_error_st *errh) {
631     if (buffer_string_is_empty(stat_cache_string))
632         sc.stat_cache_engine = STAT_CACHE_ENGINE_SIMPLE;
633     else if (buffer_eq_slen(stat_cache_string, CONST_STR_LEN("simple")))
634         sc.stat_cache_engine = STAT_CACHE_ENGINE_SIMPLE;
635 #ifdef HAVE_FAM_H
636     else if (buffer_eq_slen(stat_cache_string, CONST_STR_LEN("fam")))
637         sc.stat_cache_engine = STAT_CACHE_ENGINE_FAM;
638 #endif
639     else if (buffer_eq_slen(stat_cache_string, CONST_STR_LEN("disable")))
640         sc.stat_cache_engine = STAT_CACHE_ENGINE_NONE;
641     else {
642         log_error(errh, __FILE__, __LINE__,
643           "server.stat-cache-engine can be one of \"disable\", \"simple\","
644 #ifdef HAVE_FAM_H
645           " \"fam\","
646 #endif
647           " but not: %s", stat_cache_string->ptr);
648         return -1;
649     }
650     return 0;
651 }
652 
653 const buffer * stat_cache_mimetype_by_ext(const array * const mimetypes, const char * const name, const uint32_t nlen)
654 {
655     const char * const end = name + nlen; /*(end of string)*/
656     const uint32_t used = mimetypes->used;
657     if (used < 16) {
658         for (uint32_t i = 0; i < used; ++i) {
659             /* suffix match */
660             const data_string *ds = (data_string *)mimetypes->data[i];
661             const size_t klen = buffer_string_length(&ds->key);
662             if (klen <= nlen && buffer_eq_icase_ssn(end-klen, ds->key.ptr, klen))
663                 return &ds->value;
664         }
665     }
666     else {
667         const char *s;
668         const data_string *ds;
669         if (nlen) {
670             for (s = end-1; s != name && *s != '/'; --s) ; /*(like memrchr())*/
671             if (*s == '/') ++s;
672         }
673         else {
674             s = name;
675         }
676         /* search for basename, then longest .ext2.ext1, then .ext1, then "" */
677         ds = (const data_string *)array_get_element_klen(mimetypes, s, end - s);
678         if (NULL != ds) return &ds->value;
679         while (++s < end) {
680             while (*s != '.' && ++s != end) ;
681             if (s == end) break;
682             /* search ".ext" then "ext" */
683             ds = (const data_string *)array_get_element_klen(mimetypes, s, end - s);
684             if (NULL != ds) return &ds->value;
685             /* repeat search without leading '.' to handle situation where
686              * admin configured mimetype.assign keys without leading '.' */
687             if (++s < end) {
688                 if (*s == '.') { --s; continue; }
689                 ds = (const data_string *)array_get_element_klen(mimetypes, s, end - s);
690                 if (NULL != ds) return &ds->value;
691             }
692         }
693         /* search for ""; catchall */
694         ds = (const data_string *)array_get_element_klen(mimetypes, CONST_STR_LEN(""));
695         if (NULL != ds) return &ds->value;
696     }
697 
698     return NULL;
699 }
700 
701 #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
702 
703 const buffer * stat_cache_mimetype_by_xattr(const char * const name)
704 {
705     return stat_cache_attr_get(name) ? &attrb : NULL;
706 }
707 
708 const buffer * stat_cache_content_type_get_by_xattr(stat_cache_entry *sce, const array *mimetypes, int use_xattr)
709 {
710     /*(invalid caching if user config has multiple, different
711      * r->conf.mimetypes for same extension (not expected))*/
712     if (!buffer_string_is_empty(&sce->content_type)) return &sce->content_type;
713 
714     if (!S_ISREG(sce->st.st_mode)) return NULL;
715 
716     /* cache mimetype */
717     const buffer *mtype =
718       (use_xattr) ? stat_cache_mimetype_by_xattr(sce->name.ptr) : NULL;
719     if (NULL == mtype)
720         mtype = stat_cache_mimetype_by_ext(mimetypes,CONST_BUF_LEN(&sce->name));
721     if (NULL != mtype) {
722         if (sce->content_type.size) {
723             buffer_copy_buffer(&sce->content_type, mtype);
724         }
725         else if (mtype == &attrb) {
726             sce->content_type.ptr = NULL;
727             buffer_copy_buffer(&sce->content_type, mtype);
728         }
729         else {
730             /*(copy pointers from mimetypes array; avoid allocation)*/
731             sce->content_type.ptr = mtype->ptr;
732             sce->content_type.used = mtype->used;
733             /*(leave sce->content_type.size = 0 to flag not-allocated)*/
734         }
735     }
736     else
737         buffer_clear(&sce->content_type);
738 
739     return &sce->content_type;
740 }
741 
742 #else
743 
744 const buffer * stat_cache_content_type_get_by_ext(stat_cache_entry *sce, const array *mimetypes)
745 {
746     /*(invalid caching if user config has multiple, different
747      * r->conf.mimetypes for same extension (not expected))*/
748     if (!buffer_string_is_empty(&sce->content_type)) return &sce->content_type;
749 
750     if (!S_ISREG(sce->st.st_mode)) return NULL;
751 
752     /* cache mimetype */
753     const buffer * const mtype =
754       stat_cache_mimetype_by_ext(mimetypes, CONST_BUF_LEN(&sce->name));
755     if (NULL != mtype) {
756         /*(copy pointers from mimetypes array; avoid allocation)*/
757         sce->content_type.ptr = mtype->ptr;
758         sce->content_type.used = mtype->used;
759         /*(leave sce->content_type.size = 0 to flag not-allocated)*/
760     }
761     else
762         buffer_clear(&sce->content_type);
763 
764     return &sce->content_type;
765 }
766 
767 #endif
768 
769 const buffer * stat_cache_etag_get(stat_cache_entry *sce, int flags) {
770     /*(invalid caching if user cfg has multiple, different r->conf.etag_flags
771      * for same path (not expected, since etag flags should be by filesystem))*/
772     if (!buffer_string_is_empty(&sce->etag)) return &sce->etag;
773 
774     if (S_ISREG(sce->st.st_mode) || S_ISDIR(sce->st.st_mode)) {
775         if (0 == flags) return NULL;
776         etag_create(&sce->etag, &sce->st, flags);
777         return &sce->etag;
778     }
779 
780     return NULL;
781 }
782 
783 __attribute_pure__
784 static int stat_cache_stat_eq(const struct stat * const sta, const struct stat * const stb) {
785     return
786       #ifdef st_mtime /* use high-precision timestamp if available */
787       #if defined(__APPLE__) && defined(__MACH__)
788         sta->st_mtimespec.tv_nsec == stb->st_mtimespec.tv_nsec
789       #else
790         sta->st_mtim.tv_nsec == stb->st_mtim.tv_nsec
791       #endif
792       #endif
793         && sta->st_mtime == stb->st_mtime
794         && sta->st_size  == stb->st_size
795         && sta->st_ino   == stb->st_ino
796         && sta->st_dev   == stb->st_dev;
797 }
798 
799 void stat_cache_update_entry(const char *name, uint32_t len,
800                              struct stat *st, buffer *etagb)
801 {
802     if (sc.stat_cache_engine == STAT_CACHE_ENGINE_NONE) return;
803     force_assert(0 != len);
804     if (name[len-1] == '/') { if (0 == --len) len = 1; }
805     splay_tree **sptree = &sc.files;
806     stat_cache_entry *sce =
807       stat_cache_sptree_find(sptree, name, len);
808     if (sce && buffer_is_equal_string(&sce->name, name, len)) {
809         if (!stat_cache_stat_eq(&sce->st, st)) {
810             /* etagb might be NULL to clear etag (invalidate) */
811             buffer_copy_string_len(&sce->etag, CONST_BUF_LEN(etagb));
812           #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
813             buffer_clear(&sce->content_type);
814           #endif
815             if (sce->fd >= 0) {
816                 close(sce->fd);
817                 sce->fd = -1;
818             }
819             sce->st = *st;
820         }
821         sce->stat_ts = log_epoch_secs;
822     }
823 }
824 
825 void stat_cache_delete_entry(const char *name, uint32_t len)
826 {
827     if (sc.stat_cache_engine == STAT_CACHE_ENGINE_NONE) return;
828     force_assert(0 != len);
829     if (name[len-1] == '/') { if (0 == --len) len = 1; }
830     splay_tree **sptree = &sc.files;
831     stat_cache_entry *sce = stat_cache_sptree_find(sptree, name, len);
832     if (sce && buffer_is_equal_string(&sce->name, name, len)) {
833         stat_cache_entry_free(sce);
834         *sptree = splaytree_delete(*sptree, (*sptree)->key);
835     }
836 }
837 
838 void stat_cache_invalidate_entry(const char *name, uint32_t len)
839 {
840     splay_tree **sptree = &sc.files;
841     stat_cache_entry *sce = stat_cache_sptree_find(sptree, name, len);
842     if (sce && buffer_is_equal_string(&sce->name, name, len)) {
843         sce->stat_ts = 0;
844       #ifdef HAVE_FAM_H
845         if (sce->fam_dir != NULL) {
846             --((fam_dir_entry *)sce->fam_dir)->refcnt;
847             sce->fam_dir = NULL;
848         }
849       #endif
850     }
851 }
852 
853 #ifdef HAVE_FAM_H
854 
855 static void stat_cache_invalidate_dir_tree_walk(splay_tree *t,
856                                                 const char *name, size_t len)
857 {
858     if (t->left)  stat_cache_invalidate_dir_tree_walk(t->left,  name, len);
859     if (t->right) stat_cache_invalidate_dir_tree_walk(t->right, name, len);
860 
861     buffer *b = &((stat_cache_entry *)t->data)->name;
862     size_t blen = buffer_string_length(b);
863     if (blen > len && b->ptr[len] == '/' && 0 == memcmp(b->ptr, name, len)) {
864         stat_cache_entry *sce = t->data;
865         sce->stat_ts = 0;
866         if (sce->fam_dir != NULL) {
867             --((fam_dir_entry *)sce->fam_dir)->refcnt;
868             sce->fam_dir = NULL;
869         }
870     }
871 }
872 
873 static void stat_cache_invalidate_dir_tree(const char *name, size_t len)
874 {
875     splay_tree * const sptree = sc.files;
876     if (sptree) stat_cache_invalidate_dir_tree_walk(sptree, name, len);
877 }
878 
879 #endif
880 
881 /*
882  * walk though splay_tree and collect contents of dir tree.
883  * remove tagged entries in a second loop
884  */
885 
886 static void stat_cache_tag_dir_tree(splay_tree *t, const char *name, size_t len,
887                                     int *keys, int *ndx)
888 {
889     if (*ndx == 8192) return; /*(must match num array entries in keys[])*/
890     if (t->left)  stat_cache_tag_dir_tree(t->left,  name, len, keys, ndx);
891     if (t->right) stat_cache_tag_dir_tree(t->right, name, len, keys, ndx);
892     if (*ndx == 8192) return; /*(must match num array entries in keys[])*/
893 
894     buffer *b = &((stat_cache_entry *)t->data)->name;
895     size_t blen = buffer_string_length(b);
896     if (blen > len && b->ptr[len] == '/' && 0 == memcmp(b->ptr, name, len))
897         keys[(*ndx)++] = t->key;
898 }
899 
900 __attribute_noinline__
901 static void stat_cache_prune_dir_tree(const char *name, size_t len)
902 {
903     splay_tree *sptree = sc.files;
904     int max_ndx, i;
905     int keys[8192]; /* 32k size on stack */
906     do {
907         if (!sptree) break;
908         max_ndx = 0;
909         stat_cache_tag_dir_tree(sptree, name, len, keys, &max_ndx);
910         for (i = 0; i < max_ndx; ++i) {
911             const int ndx = keys[i];
912             splay_tree *node = sptree = splaytree_splay(sptree, ndx);
913             if (node && node->key == ndx) {
914                 stat_cache_entry_free(node->data);
915                 sptree = splaytree_delete(sptree, ndx);
916             }
917         }
918     } while (max_ndx == sizeof(keys)/sizeof(int));
919     sc.files = sptree;
920 }
921 
922 static void stat_cache_delete_tree(const char *name, uint32_t len)
923 {
924     stat_cache_delete_entry(name, len);
925     stat_cache_prune_dir_tree(name, len);
926 }
927 
928 void stat_cache_delete_dir(const char *name, uint32_t len)
929 {
930     force_assert(0 != len);
931     if (name[len-1] == '/') { if (0 == --len) len = 1; }
932     stat_cache_delete_tree(name, len);
933   #ifdef HAVE_FAM_H
934     if (sc.stat_cache_engine == STAT_CACHE_ENGINE_FAM) {
935         splay_tree **sptree = &sc.scf->dirs;
936         fam_dir_entry *fam_dir = stat_cache_sptree_find(sptree, name, len);
937         if (fam_dir && buffer_is_equal_string(fam_dir->name, name, len))
938             fam_dir_invalidate_node(fam_dir);
939         if (*sptree) fam_dir_invalidate_tree(*sptree, name, len);
940         fam_dir_periodic_cleanup();
941     }
942   #endif
943 }
944 
945 /***
946  *
947  *
948  *
949  * returns:
950  *  - HANDLER_FINISHED on cache-miss (don't forget to reopen the file)
951  *  - HANDLER_ERROR on stat() failed -> see errno for problem
952  */
953 
954 stat_cache_entry * stat_cache_get_entry(const buffer *name) {
955 	stat_cache_entry *sce = NULL;
956 	struct stat st;
957 	int file_ndx;
958 
959 	/* consistency: ensure lookup name does not end in '/' unless root "/"
960 	 * (but use full path given with stat(), even with trailing '/') */
961 	int final_slash = 0;
962 	size_t len = buffer_string_length(name);
963 	force_assert(0 != len);
964 	if (name->ptr[len-1] == '/') { final_slash = 1; if (0 == --len) len = 1; }
965 	/* Note: paths are expected to be normalized before calling stat_cache,
966 	 * e.g. without repeated '/' */
967 
968 	if (name->ptr[0] != '/') {
969 		errno = EINVAL;
970 		return NULL;
971 	}
972 
973 	/*
974 	 * check if the directory for this file has changed
975 	 */
976 
977 	const time_t cur_ts = log_epoch_secs;
978 
979 	file_ndx = splaytree_djbhash(name->ptr, len);
980 	splay_tree * const sptree = sc.files = splaytree_splay(sc.files, file_ndx);
981 
982 	if (sptree && (sptree->key == file_ndx)) {
983 		/* we have seen this file already and
984 		 * don't stat() it again in the same second */
985 
986 		sce = sptree->data;
987 
988 		/* check if the name is the same, we might have a collision */
989 
990 		if (buffer_is_equal_string(&sce->name, name->ptr, len)) {
991 			if (sc.stat_cache_engine == STAT_CACHE_ENGINE_SIMPLE) {
992 				if (sce->stat_ts == cur_ts) {
993 					if (final_slash && !S_ISDIR(sce->st.st_mode)) {
994 						errno = ENOTDIR;
995 						return NULL;
996 					}
997 					return sce;
998 				}
999 			}
1000 		      #ifdef HAVE_FAM_H
1001 			else if (sc.stat_cache_engine == STAT_CACHE_ENGINE_FAM
1002 				 && sce->fam_dir) { /* entry is in monitored dir */
1003 				/* re-stat() periodically, even if monitoring for changes
1004 				 * (due to limitations in stat_cache.c use of FAM)
1005 				 * (gaps due to not continually monitoring an entire tree) */
1006 				if (cur_ts - sce->stat_ts < 16) {
1007 					if (final_slash && !S_ISDIR(sce->st.st_mode)) {
1008 						errno = ENOTDIR;
1009 						return NULL;
1010 					}
1011 					return sce;
1012 				}
1013 			}
1014 		      #endif
1015 		} else {
1016 			/* collision, forget about the entry */
1017 			sce = NULL;
1018 		}
1019 	}
1020 
1021 	if (-1 == stat(name->ptr, &st)) {
1022 		return NULL;
1023 	}
1024 
1025 	if (S_ISREG(st.st_mode)) {
1026 		/* fix broken stat/open for symlinks to reg files with appended slash on freebsd,osx */
1027 		if (name->ptr[buffer_string_length(name) - 1] == '/') {
1028 			errno = ENOTDIR;
1029 			return NULL;
1030 		}
1031 	}
1032 
1033 	if (NULL == sce) {
1034 
1035 		sce = stat_cache_entry_init();
1036 		buffer_copy_string_len(&sce->name, name->ptr, len);
1037 
1038 		/* already splayed file_ndx */
1039 		if (NULL != sptree && sptree->key == file_ndx) {
1040 			/* hash collision: replace old entry */
1041 			stat_cache_entry_free(sptree->data);
1042 			sptree->data = sce;
1043 		} else {
1044 			sc.files = splaytree_insert(sptree, file_ndx, sce);
1045 		}
1046 
1047 	} else {
1048 
1049 		buffer_clear(&sce->etag);
1050 	      #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
1051 		buffer_clear(&sce->content_type);
1052 	      #endif
1053 
1054 	}
1055 
1056 	if (sce->fd >= 0) {
1057 		/* close fd if file changed */
1058 		if (!stat_cache_stat_eq(&sce->st, &st)) {
1059 			close(sce->fd);
1060 			sce->fd = -1;
1061 		}
1062 	}
1063 
1064 	sce->st = st; /*(copy prior to calling fam_dir_monitor())*/
1065 
1066 #ifdef HAVE_FAM_H
1067 	if (sc.stat_cache_engine == STAT_CACHE_ENGINE_FAM) {
1068 		if (sce->fam_dir) --((fam_dir_entry *)sce->fam_dir)->refcnt;
1069 		sce->fam_dir =
1070 		  fam_dir_monitor(sc.scf, CONST_BUF_LEN(name), &st);
1071 	      #if 0 /*(performed below)*/
1072 		if (NULL != sce->fam_dir) {
1073 			/*(may have been invalidated by dir change)*/
1074 			sce->stat_ts = cur_ts;
1075 		}
1076 	      #endif
1077 	}
1078 #endif
1079 
1080 	sce->stat_ts = cur_ts;
1081 	return sce;
1082 }
1083 
1084 stat_cache_entry * stat_cache_get_entry_open(const buffer * const name, const int symlinks) {
1085     stat_cache_entry * const sce = stat_cache_get_entry(name);
1086     if (NULL == sce) return NULL;
1087     if (sce->fd >= 0) return sce;
1088     if (sce->st.st_size > 0)
1089         sce->fd = stat_cache_open_rdonly_fstat(name, &sce->st, symlinks);
1090     return sce; /* (note: sce->fd might still be -1 if open() failed) */
1091 }
1092 
1093 int stat_cache_path_isdir(const buffer *name) {
1094     const stat_cache_entry * const sce = stat_cache_get_entry(name);
1095     return (sce && (S_ISDIR(sce->st.st_mode) ? 1 : (errno = ENOTDIR, 0)));
1096 }
1097 
1098 int stat_cache_path_contains_symlink(const buffer *name, log_error_st *errh) {
1099     /* caller should check for symlinks only if we should block symlinks. */
1100 
1101     /* catch the obvious symlinks
1102      *
1103      * this is not a secure check as we still have a race-condition between
1104      * the stat() and the open. We can only solve this by
1105      * 1. open() the file
1106      * 2. fstat() the fd
1107      *
1108      * and keeping the file open for the rest of the time. But this can
1109      * only be done at network level.
1110      * */
1111 
1112   #ifdef HAVE_LSTAT
1113     /* we assume "/" can not be symlink,
1114      * so skip the symlink stuff if path is "/" */
1115     size_t len = buffer_string_length(name);
1116     force_assert(0 != len);
1117     force_assert(name->ptr[0] == '/');
1118     if (1 == len) return 0;
1119    #ifndef PATH_MAX
1120    #define PATH_MAX 4096
1121    #endif
1122     if (len >= PATH_MAX) return -1;
1123 
1124     char buf[PATH_MAX];
1125     memcpy(buf, name->ptr, len);
1126     char *s_cur = buf+len;
1127     do {
1128         *s_cur = '\0';
1129         struct stat st;
1130         if (0 == lstat(buf, &st)) {
1131             if (S_ISLNK(st.st_mode)) return 1;
1132         }
1133         else {
1134             log_perror(errh, __FILE__, __LINE__, "lstat failed for: %s", buf);
1135             return -1;
1136         }
1137     } while ((s_cur = strrchr(buf, '/')) > buf); /*(&buf[0]==buf; NULL < buf)*/
1138   #endif
1139 
1140     return 0;
1141 }
1142 
1143 int stat_cache_open_rdonly_fstat (const buffer *name, struct stat *st, int symlinks) {
1144 	/*(Note: O_NOFOLLOW affects only the final path segment, the target file,
1145 	 * not any intermediate symlinks along the path)*/
1146 	const int fd = fdevent_open_cloexec(name->ptr, symlinks, O_RDONLY, 0);
1147 	if (fd >= 0) {
1148 		if (0 == fstat(fd, st)) {
1149 			return fd;
1150 		} else {
1151 			const int errnum = errno;
1152 			close(fd);
1153 			errno = errnum;
1154 		}
1155 	}
1156 	return -1;
1157 }
1158 
1159 /**
1160  * remove stat() from cache which haven't been stat()ed for
1161  * more than 2 seconds
1162  *
1163  *
1164  * walk though the stat-cache, collect the ids which are too old
1165  * and remove them in a second loop
1166  */
1167 
1168 static void stat_cache_tag_old_entries(splay_tree * const t, int * const keys, int * const ndx, const time_t max_age, const time_t cur_ts) {
1169     if (*ndx == 8192) return; /*(must match num array entries in keys[])*/
1170     if (t->left)
1171         stat_cache_tag_old_entries(t->left, keys, ndx, max_age, cur_ts);
1172     if (t->right)
1173         stat_cache_tag_old_entries(t->right, keys, ndx, max_age, cur_ts);
1174     if (*ndx == 8192) return; /*(must match num array entries in keys[])*/
1175 
1176     const stat_cache_entry * const sce = t->data;
1177     if (cur_ts - sce->stat_ts > max_age)
1178         keys[(*ndx)++] = t->key;
1179 }
1180 
1181 static void stat_cache_periodic_cleanup(const time_t max_age, const time_t cur_ts) {
1182     splay_tree *sptree = sc.files;
1183     int max_ndx, i;
1184     int keys[8192]; /* 32k size on stack */
1185     do {
1186         if (!sptree) break;
1187         max_ndx = 0;
1188         stat_cache_tag_old_entries(sptree, keys, &max_ndx, max_age, cur_ts);
1189         for (i = 0; i < max_ndx; ++i) {
1190             int ndx = keys[i];
1191             sptree = splaytree_splay(sptree, ndx);
1192             if (sptree && sptree->key == ndx) {
1193                 stat_cache_entry_free(sptree->data);
1194                 sptree = splaytree_delete(sptree, ndx);
1195             }
1196         }
1197     } while (max_ndx == sizeof(keys)/sizeof(int));
1198     sc.files = sptree;
1199 }
1200 
1201 void stat_cache_trigger_cleanup(void) {
1202 	time_t max_age = 2;
1203 
1204       #ifdef HAVE_FAM_H
1205 	if (STAT_CACHE_ENGINE_FAM == sc.stat_cache_engine) {
1206 		if (log_epoch_secs & 0x1F) return;
1207 		/* once every 32 seconds (0x1F == 31) */
1208 		max_age = 32;
1209 		fam_dir_periodic_cleanup();
1210 		/* By doing this before stat_cache_periodic_cleanup(),
1211 		 * entries used within the next max_age secs will remain
1212 		 * monitored, instead of effectively flushing and
1213 		 * rebuilding the FAM monitoring every max_age seconds */
1214 	}
1215       #endif
1216 
1217 	stat_cache_periodic_cleanup(max_age, log_epoch_secs);
1218 }
1219