xref: /lighttpd1.4/src/stat_cache.c (revision 520bffcd)
1 #include "first.h"
2 
3 #include "stat_cache.h"
4 #include "log.h"
5 #include "fdevent.h"
6 #include "etag.h"
7 #include "algo_splaytree.h"
8 
9 #include <sys/types.h>
10 #include <sys/stat.h>
11 
12 #include <stdlib.h>
13 #include <string.h>
14 #include <errno.h>
15 #include <unistd.h>
16 #include <fcntl.h>
17 
18 #if defined(HAVE_SYS_XATTR_H)
19 # include <sys/xattr.h>
20 #elif defined(HAVE_ATTR_ATTRIBUTES_H)
21 # include <attr/attributes.h>
22 #endif
23 
24 #ifdef HAVE_SYS_EXTATTR_H
25 # include <sys/extattr.h>
26 #endif
27 
28 #ifndef HAVE_LSTAT
29 #define lstat stat
30 #ifndef S_ISLNK
31 #define S_ISLNK(mode) (0)
32 #endif
33 #endif
34 
35 /*
36  * stat-cache
37  *
38  * - a splay-tree is used as we can use the caching effect of it
39  */
40 
41 enum {
42   STAT_CACHE_ENGINE_SIMPLE  = 0  /*(default)*/
43  ,STAT_CACHE_ENGINE_NONE    = 1
44  ,STAT_CACHE_ENGINE_FAM     = 2  /* same as STAT_CACHE_ENGINE_INOTIFY */
45  ,STAT_CACHE_ENGINE_INOTIFY = 2  /* same as STAT_CACHE_ENGINE_FAM */
46  ,STAT_CACHE_ENGINE_KQUEUE  = 2  /* same as STAT_CACHE_ENGINE_FAM */
47 };
48 
49 struct stat_cache_fam;  /* declaration */
50 
51 typedef struct stat_cache {
52 	int stat_cache_engine;
53 	splay_tree *files; /* nodes of tree are (stat_cache_entry *) */
54 	struct stat_cache_fam *scf;
55 } stat_cache;
56 
57 static stat_cache sc;
58 
59 
60 static void * stat_cache_sptree_find(splay_tree ** const sptree,
61                                      const char * const name,
62                                      uint32_t len)
63 {
64     const int ndx = splaytree_djbhash(name, len);
65     *sptree = splaytree_splay(*sptree, ndx);
66     return (*sptree && (*sptree)->key == ndx) ? (*sptree)->data : NULL;
67 }
68 
69 
70 #if defined(HAVE_SYS_INOTIFY_H) \
71  || (defined(HAVE_SYS_EVENT_H) && defined(HAVE_KQUEUE))
72 #ifndef HAVE_FAM_H
73 #define HAVE_FAM_H
74 #endif
75 #endif
76 
77 #ifdef HAVE_FAM_H
78 
79 /* monitor changes in directories using FAM
80  *
81  * This implementation employing FAM monitors directories as they are used,
82  * and maintains a reference count for cache use within stat_cache.c.
83  * A periodic job runs in lighttpd every 32 seconds, expiring entries unused
84  * in last 64 seconds out of the cache and cancelling FAM monitoring.  Items
85  * within the cache are checked against the filesystem upon use if last stat()
86  * was greater than or equal to 16 seconds ago.
87  *
88  * This implementation does not monitor every directory in a tree, and therefore
89  * the cache may get out-of-sync with the filesystem.  Delays in receiving and
90  * processing events from FAM might also lead to stale cache entries.
91  *
92  * For many websites, a large number of files are seldom, if ever, modified,
93  * and a common practice with images is to create a new file with a new name
94  * when a new version is needed, in order for client browsers and CDNs to better
95  * cache the content.  Given this, most use will see little difference in
96  * performance between server.stat-cache-engine = "fam" and "simple" (default).
97  * The default server.stat-cache-engine = "simple" calls stat() on a target once
98  * per second, and reuses that information until the next second.  For use where
99  * changes must be immediately visible, server.stat-cache-engine = "disable"
100  * should be used.
101  *
102  * When considering use of server.stat-cache-engine = "fam", there are a few
103  * additional limitations for this cache implementation using FAM.
104  * - symlinks to files located outside of the current directory do not result
105  *   in changes to that file being monitored (unless that file is in a directory
106  *   which is monitored as a result of a different request).  symlinks can be
107  *   chained and can be circular.  This implementation *does not* readlink() or
108  *   realpath() to resolve the chains to find and monitor the ultimate target
109  *   directory.  While symlinks to files located outside the current directory
110  *   are not monitored, symlinks to directories *are* monitored, though chains
111  *   of symlinks to directories do not result in monitoring of the directories
112  *   containing intermediate symlinks to the target directory.
113  * - directory rename of a directory which is not currently being monitored will
114  *   result in stale information in the cache if there is a subdirectory that is
115  *   being monitored.
116  * Even though lighttpd will not receive FAM events in the above cases, lighttpd
117  * does re-validate the information in the cache upon use if the cache entry has
118  * not been checked in 16 seconds, so that is the upper limit for use of stale
119  * data.
120  *
121  * Use of server.stat-cache-engine = "fam" is discouraged for extremely volatile
122  * directories such as temporary directories (e.g. /tmp and maybe /var/tmp) due
123  * to the overhead of processing the additional noise generated from changes.
124  * Related, server.stat-cache-engine = "fam" is not recommended on trees of
125  * untrusted files where a malicious user could generate an excess of change
126  * events.
127  *
128  * Internal note: lighttpd walks the caches to prune trees in stat_cache when an
129  * event is received for a directory (or symlink to a directory) which has been
130  * deleted or renamed.  The splaytree data structure is suboptimal for frequent
131  * changes of large directories trees where there have been a large number of
132  * different files recently accessed and part of the stat_cache.
133  */
134 
135 #ifdef HAVE_SYS_INOTIFY_H
136 
137 #include <sys/inotify.h>
138 
139 /*(translate FAM API to inotify; this is specific to stat_cache.c use of FAM)*/
140 #define fam fd /*(translate struct stat_cache_fam scf->fam -> scf->fd)*/
141 typedef int FAMRequest; /*(fr)*/
142 #define FAMClose(fd) \
143         close(*(fd))
144 #define FAMCancelMonitor(fd, wd) \
145         inotify_rm_watch(*(fd), *(wd))
146 #define fam_watch_mask IN_ATTRIB | IN_CREATE | IN_DELETE | IN_DELETE_SELF \
147                      | IN_MODIFY | IN_MOVE_SELF | IN_MOVED_FROM \
148                      | IN_EXCL_UNLINK | IN_ONLYDIR
149                      /*(note: follows symlinks; not providing IN_DONT_FOLLOW)*/
150 #define FAMMonitorDirectory(fd, fn, wd, userData) \
151         ((*(wd) = inotify_add_watch(*(fd), (fn), (fam_watch_mask))) < 0)
152 typedef enum FAMCodes { /*(copied from fam.h to define arbitrary enum values)*/
153     FAMChanged=1,
154     FAMDeleted=2,
155     FAMCreated=5,
156     FAMMoved=6,
157 } FAMCodes;
158 
159 #elif defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE
160 
161 #include <sys/event.h>
162 #include <sys/time.h>
163 
164 /*(translate FAM API to inotify; this is specific to stat_cache.c use of FAM)*/
165 #define fam fd /*(translate struct stat_cache_fam scf->fam -> scf->fd)*/
166 typedef int FAMRequest; /*(fr)*/
167 #define FAMClose(fd) \
168         (-1 != (*(fd)) ? close(*(fd)) : 0)
169 static int FAMCancelMonitor (const int * const fd, int * const wd)
170 {
171     if (-1 == *fd) return 0;
172     if (-1 == *wd) return 0;
173     struct timespec t0 = { 0, 0 };
174     struct kevent kev;
175     EV_SET(&kev, *wd, EVFILT_VNODE, EV_DELETE, 0, 0, 0);
176     int rc = kevent(*fd, &kev, 1, NULL, 0, &t0);
177     close(*wd);
178     *wd = -1;
179     return rc;
180 }
181 static int FAMMonitorDirectory (int * const fd, char * const fn, int * const wd, void * const userData)
182 {
183     *wd = fdevent_open_dirname(fn, 1); /*(note: follows symlinks)*/
184     if (-1 == *wd) return -1;
185     struct timespec t0 = { 0, 0 };
186     struct kevent kev;
187     unsigned short kev_flags = EV_ADD | EV_ENABLE | EV_CLEAR;
188     unsigned int kev_fflags = NOTE_ATTRIB | NOTE_EXTEND | NOTE_LINK | NOTE_WRITE
189                             | NOTE_DELETE | NOTE_REVOKE | NOTE_RENAME;
190     EV_SET(&kev, *wd, EVFILT_VNODE, kev_flags, kev_fflags, 0, userData);
191     return kevent(*fd, &kev, 1, NULL, 0, &t0);
192 }
193 typedef enum FAMCodes { /*(copied from fam.h to define arbitrary enum values)*/
194     FAMChanged=1,
195     FAMDeleted=2,
196     FAMCreated=5,
197     FAMMoved=6,
198 } FAMCodes;
199 
200 #else
201 
202 #include <fam.h>
203 
204 #ifdef HAVE_FAMNOEXISTS
205 #ifndef LIGHTTPD_STATIC
206 #include <dlfcn.h>
207 #endif
208 #endif
209 
210 #endif
211 
212 typedef struct fam_dir_entry {
213 	buffer *name;
214 	int refcnt;
215 	FAMRequest req;
216 	time_t stat_ts;
217 	dev_t st_dev;
218 	ino_t st_ino;
219 	struct fam_dir_entry *fam_parent;
220 } fam_dir_entry;
221 
222 typedef struct stat_cache_fam {
223 	splay_tree *dirs; /* indexed by path; node data is fam_dir_entry */
224   #ifdef HAVE_SYS_INOTIFY_H
225 	splay_tree *wds;  /* indexed by inotify watch descriptor */
226   #elif defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE
227   #else
228 	FAMConnection fam;
229   #endif
230 	log_error_st *errh;
231 	fdevents *ev;
232 	fdnode *fdn;
233 	int fd;
234 } stat_cache_fam;
235 
236 static fam_dir_entry * fam_dir_entry_init(const char *name, size_t len)
237 {
238     fam_dir_entry * const fam_dir = calloc(1, sizeof(*fam_dir));
239     force_assert(NULL != fam_dir);
240 
241     fam_dir->name = buffer_init();
242     buffer_copy_string_len(fam_dir->name, name, len);
243     fam_dir->refcnt = 0;
244   #if defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE
245     fam_dir->req = -1;
246   #endif
247 
248     return fam_dir;
249 }
250 
251 static void fam_dir_entry_free(fam_dir_entry *fam_dir)
252 {
253     if (!fam_dir) return;
254     /*(fam_dir->parent might be invalid pointer here; ignore)*/
255     buffer_free(fam_dir->name);
256   #if defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE
257     if (-1 != fam_dir->req)
258         close(fam_dir->req);
259   #endif
260     free(fam_dir);
261 }
262 
263 static void fam_dir_invalidate_node(fam_dir_entry *fam_dir)
264 {
265     fam_dir->stat_ts = 0;
266     if (fam_dir->fam_parent) {
267         --fam_dir->fam_parent->refcnt;
268         fam_dir->fam_parent = NULL;
269     }
270 }
271 
272 /*
273  * walk though splay_tree and collect contents of dir tree.
274  * remove tagged entries in a second loop
275  */
276 
277 static void fam_dir_tag_refcnt(splay_tree *t, int *keys, int *ndx)
278 {
279     if (*ndx == 512) return; /*(must match num array entries in keys[])*/
280     if (t->left)  fam_dir_tag_refcnt(t->left,  keys, ndx);
281     if (t->right) fam_dir_tag_refcnt(t->right, keys, ndx);
282     if (*ndx == 512) return; /*(must match num array entries in keys[])*/
283 
284     fam_dir_entry * const fam_dir = t->data;
285     if (0 == fam_dir->refcnt) {
286         fam_dir_invalidate_node(fam_dir);
287         keys[(*ndx)++] = t->key;
288     }
289 }
290 
291 __attribute_noinline__
292 static void fam_dir_periodic_cleanup() {
293     stat_cache_fam * const scf = sc.scf;
294     int max_ndx, i;
295     int keys[512]; /* 2k size on stack */
296   #if defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE
297     struct kevent kevl[512]; /* 32k size on stack to batch kevent EV_DELETE */
298   #endif
299     do {
300         if (!scf->dirs) break;
301         max_ndx = 0;
302         fam_dir_tag_refcnt(scf->dirs, keys, &max_ndx);
303         for (i = 0; i < max_ndx; ++i) {
304             const int ndx = keys[i];
305             splay_tree *node = scf->dirs = splaytree_splay(scf->dirs, ndx);
306             if (node && node->key == ndx) {
307                 fam_dir_entry *fam_dir = node->data;
308                 scf->dirs = splaytree_delete(scf->dirs, ndx);
309               #ifdef HAVE_SYS_INOTIFY_H
310                 scf->wds = splaytree_delete(scf->wds, fam_dir->req);
311               #elif defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE
312                 /* batch process kevent removal; defer cancel */
313                 EV_SET(kevl+i, fam_dir->req, EVFILT_VNODE, EV_DELETE, 0, 0, 0);
314                 fam_dir->req = -1; /*(make FAMCancelMonitor() a no-op)*/
315               #endif
316                 FAMCancelMonitor(&scf->fam, &fam_dir->req);
317                 fam_dir_entry_free(fam_dir);
318             }
319         }
320       #if defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE
321         /* batch process: kevent() to submit EV_DELETE, then close dir fds */
322         if (0 == max_ndx) break;
323         struct timespec t0 = { 0, 0 };
324         kevent(scf->fd, kevl, max_ndx, NULL, 0, &t0);
325         for (i = 0; i < max_ndx; ++i)
326             close((int)kevl[i].ident);
327       #endif
328     } while (max_ndx == sizeof(keys)/sizeof(int));
329 }
330 
331 static void fam_dir_invalidate_tree(splay_tree *t, const char *name, size_t len)
332 {
333   #ifdef __clang_analyzer__
334     force_assert(name);
335   #endif
336     /*force_assert(t);*/
337     if (t->left)  fam_dir_invalidate_tree(t->left,  name, len);
338     if (t->right) fam_dir_invalidate_tree(t->right, name, len);
339 
340     fam_dir_entry * const fam_dir = t->data;
341   #ifdef __clang_analyzer__
342     force_assert(fam_dir);
343   #endif
344     buffer *b = fam_dir->name;
345     size_t blen = buffer_string_length(b);
346     if (blen > len && b->ptr[len] == '/' && 0 == memcmp(b->ptr, name, len))
347         fam_dir_invalidate_node(fam_dir);
348 }
349 
350 /* declarations */
351 static void stat_cache_delete_tree(const char *name, uint32_t len);
352 static void stat_cache_invalidate_dir_tree(const char *name, size_t len);
353 static void stat_cache_handle_fdevent_fn(stat_cache_fam * const scf, fam_dir_entry * const fam_dir, const char * const fn, const uint32_t fnlen, int code);
354 
355 static void stat_cache_handle_fdevent_in(stat_cache_fam *scf)
356 {
357   #ifdef HAVE_SYS_INOTIFY_H
358     /*(inotify pads in->len to align struct following in->name[])*/
359     char buf[4096]
360       __attribute__ ((__aligned__(__alignof__(struct inotify_event))));
361     int rd;
362     do {
363         rd = (int)read(scf->fd, buf, sizeof(buf));
364         if (rd <= 0) {
365             if (-1 == rd && errno != EINTR && errno != EAGAIN) {
366                 log_perror(scf->errh, __FILE__, __LINE__, "inotify error");
367                 /* TODO: could flush cache, close scf->fd, and re-open inotify*/
368             }
369             break;
370         }
371         for (int i = 0; i < rd; ) {
372             struct inotify_event * const in =
373               (struct inotify_event *)((uintptr_t)buf + i);
374             i += sizeof(struct inotify_event) + in->len;
375             if (in->mask & IN_CREATE)
376                 continue; /*(see comment below for FAMCreated)*/
377             if (in->mask & IN_Q_OVERFLOW) {
378                 log_error(scf->errh, __FILE__, __LINE__,
379                           "inotify queue overflow");
380                 continue;
381             }
382             /* ignore events which may have been pending for
383              * paths recently cancelled via FAMCancelMonitor() */
384             scf->wds = splaytree_splay(scf->wds, in->wd);
385             if (!scf->wds || scf->wds->key != in->wd)
386                 continue;
387             fam_dir_entry *fam_dir = scf->wds->data;
388             if (NULL == fam_dir)        /*(should not happen)*/
389                 continue;
390             if (fam_dir->req != in->wd) /*(should not happen)*/
391                 continue;
392             /*(specific to use here in stat_cache.c)*/
393             int code = 0;
394             if (in->mask & (IN_ATTRIB | IN_MODIFY))
395                 code = FAMChanged;
396             else if (in->mask & (IN_DELETE | IN_DELETE_SELF | IN_UNMOUNT))
397                 code = FAMDeleted;
398             else if (in->mask & (IN_MOVE_SELF | IN_MOVED_FROM))
399                 code = FAMMoved;
400 
401             if (in->len) {
402                 do { --in->len; } while (in->len && in->name[in->len-1]=='\0');
403             }
404             stat_cache_handle_fdevent_fn(scf, fam_dir, in->name, in->len, code);
405         }
406     } while (rd + sizeof(struct inotify_event) + NAME_MAX + 1 > sizeof(buf));
407   #elif defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE
408     struct kevent kevl[256];
409     struct timespec t0 = { 0, 0 };
410     int n;
411     do {
412         n = kevent(scf->fd, NULL, 0, kevl, sizeof(kevl)/sizeof(*kevl), &t0);
413         if (n <= 0) break;
414         for (int i = 0; i < n; ++i) {
415             const struct kevent * const kev = kevl+i;
416             /* ignore events which may have been pending for
417              * paths recently cancelled via FAMCancelMonitor() */
418             int ndx = (int)(intptr_t)kev->udata;
419             scf->dirs = splaytree_splay(scf->dirs, ndx);
420             if (!scf->dirs || scf->dirs->key != ndx)
421                 continue;
422             fam_dir_entry *fam_dir = scf->dirs->data;
423             if (fam_dir->req != (int)kev->ident)
424                 continue;
425             /*(specific to use here in stat_cache.c)*/
426             /* note: stat_cache only monitors on directories,
427              *       so events here are only on directories
428              * note: changes are treated as FAMDeleted since
429              *       it is unknown which file in dir was changed
430              *       This is not efficient, but this stat_cache mechanism also
431              *       should not be used on frequently modified directories. */
432             int code = 0;
433             if (kev->fflags & (NOTE_WRITE|NOTE_ATTRIB|NOTE_EXTEND|NOTE_LINK))
434                 code = FAMDeleted; /*(not FAMChanged; see comment above)*/
435             else if (kev->fflags & (NOTE_DELETE|NOTE_REVOKE))
436                 code = FAMDeleted;
437             else if (kev->fflags & NOTE_RENAME)
438                 code = FAMMoved;
439             if (kev->flags & EV_ERROR) /*(not expected; treat as FAMDeleted)*/
440                 code = FAMDeleted;
441             stat_cache_handle_fdevent_fn(scf, fam_dir, NULL, 0, code);
442         }
443     } while (n == sizeof(kevl)/sizeof(*kevl));
444   #else
445     for (int i = 0, ndx; i || (i = FAMPending(&scf->fam)) > 0; --i) {
446         FAMEvent fe;
447         if (FAMNextEvent(&scf->fam, &fe) < 0) break;
448 
449         /* ignore events which may have been pending for
450          * paths recently cancelled via FAMCancelMonitor() */
451         ndx = (int)(intptr_t)fe.userdata;
452         scf->dirs = splaytree_splay(scf->dirs, ndx);
453         if (!scf->dirs || scf->dirs->key != ndx) {
454             continue;
455         }
456         fam_dir_entry *fam_dir = scf->dirs->data;
457         if (FAMREQUEST_GETREQNUM(&fam_dir->req)
458             != FAMREQUEST_GETREQNUM(&fe.fr)) {
459             continue;
460         }
461 
462         uint32_t fnlen = (fe.code != FAMCreated && fe.filename[0] != '/')
463           ? (uint32_t)strlen(fe.filename)
464           : 0;
465         stat_cache_handle_fdevent_fn(scf, fam_dir, fe.filename, fnlen, fe.code);
466     }
467   #endif
468 }
469 
470 static void stat_cache_handle_fdevent_fn(stat_cache_fam * const scf, fam_dir_entry *fam_dir, const char * const fn, const uint32_t fnlen, int code)
471 {
472         if (fnlen) {
473             buffer * const n = fam_dir->name;
474             fam_dir_entry *fam_link;
475             uint32_t len;
476             switch (code) {
477             case FAMCreated:
478                 /* file created in monitored dir modifies dir and
479                  * we should get a separate FAMChanged event for dir.
480                  * Therefore, ignore file FAMCreated event here.
481                  * Also, if FAMNoExists() is used, might get spurious
482                  * FAMCreated events as changes are made e.g. in monitored
483                  * sub-sub-sub dirs and the library discovers new (already
484                  * existing) dir entries */
485                 return;
486             case FAMChanged:
487                 /* file changed in monitored dir does not modify dir */
488             case FAMDeleted:
489             case FAMMoved:
490                 /* file deleted or moved in monitored dir modifies dir,
491                  * but FAM provides separate notification for that */
492 
493                 /* temporarily append filename to dir in fam_dir->name to
494                  * construct path, then delete stat_cache entry (if any)*/
495                 len = buffer_string_length(n);
496                 buffer_append_string_len(n, CONST_STR_LEN("/"));
497                 buffer_append_string_len(n, fn, fnlen);
498                 /* (alternatively, could chose to stat() and update)*/
499                 stat_cache_invalidate_entry(CONST_BUF_LEN(n));
500 
501                 fam_link = /*(check if might be symlink to monitored dir)*/
502                   stat_cache_sptree_find(&scf->dirs, CONST_BUF_LEN(n));
503                 if (fam_link && !buffer_is_equal(fam_link->name, n))
504                     fam_link = NULL;
505 
506                 buffer_string_set_length(n, len);
507 
508                 if (fam_link) {
509                     /* replaced symlink changes containing dir */
510                     stat_cache_invalidate_entry(CONST_BUF_LEN(n));
511                     /* handle symlink to dir as deleted dir below */
512                     code = FAMDeleted;
513                     fam_dir = fam_link;
514                     break;
515                 }
516                 return;
517             default:
518                 return;
519             }
520         }
521 
522         switch(code) {
523         case FAMChanged:
524             stat_cache_invalidate_entry(CONST_BUF_LEN(fam_dir->name));
525             break;
526         case FAMDeleted:
527         case FAMMoved:
528             stat_cache_delete_tree(CONST_BUF_LEN(fam_dir->name));
529             fam_dir_invalidate_node(fam_dir);
530             if (scf->dirs)
531                 fam_dir_invalidate_tree(scf->dirs,CONST_BUF_LEN(fam_dir->name));
532             fam_dir_periodic_cleanup();
533             break;
534         default:
535             break;
536         }
537 }
538 
539 static handler_t stat_cache_handle_fdevent(void *ctx, int revent)
540 {
541 	stat_cache_fam * const scf = ctx; /* sc.scf */
542 
543 	if (revent & FDEVENT_IN) {
544 		stat_cache_handle_fdevent_in(scf);
545 	}
546 
547 	if (revent & (FDEVENT_HUP|FDEVENT_RDHUP)) {
548 		/* fam closed the connection */
549 		log_error(scf->errh, __FILE__, __LINE__,
550 		  "FAM connection closed; disabling stat_cache.");
551 		/* (although effectively STAT_CACHE_ENGINE_NONE,
552 		 *  do not change here so that periodic jobs clean up memory)*/
553 		/*sc.stat_cache_engine = STAT_CACHE_ENGINE_NONE; */
554 		fdevent_fdnode_event_del(scf->ev, scf->fdn);
555 		fdevent_unregister(scf->ev, scf->fd);
556 		scf->fdn = NULL;
557 
558 		FAMClose(&scf->fam);
559 		scf->fd = -1;
560 	}
561 
562 	return HANDLER_GO_ON;
563 }
564 
565 static stat_cache_fam * stat_cache_init_fam(fdevents *ev, log_error_st *errh) {
566 	stat_cache_fam *scf = calloc(1, sizeof(*scf));
567 	force_assert(scf);
568 	scf->fd = -1;
569 	scf->ev = ev;
570 	scf->errh = errh;
571 
572   #ifdef HAVE_SYS_INOTIFY_H
573 	scf->fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
574 	if (scf->fd < 0) {
575 		log_perror(errh, __FILE__, __LINE__, "inotify_init1()");
576 		return NULL;
577 	}
578   #elif defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE
579    #ifdef __NetBSD__
580 	scf->fd = kqueue1(O_NONBLOCK|O_CLOEXEC|O_NOSIGPIPE);
581    #else
582 	scf->fd = kqueue();
583 	if (scf->fd >= 0) fdevent_setfd_cloexec(scf->fd);
584    #endif
585 	if (scf->fd < 0) {
586 		log_perror(errh, __FILE__, __LINE__, "kqueue()");
587 		return NULL;
588 	}
589   #else
590 	/* setup FAM */
591 	if (0 != FAMOpen2(&scf->fam, "lighttpd")) {
592 		log_error(errh, __FILE__, __LINE__,
593 		  "could not open a fam connection, dying.");
594 		return NULL;
595 	}
596       #ifdef HAVE_FAMNOEXISTS
597       #ifdef LIGHTTPD_STATIC
598 	FAMNoExists(&scf->fam);
599       #else
600 	int (*FAMNoExists_fn)(FAMConnection *);
601 	FAMNoExists_fn =
602 	  (int (*)(FAMConnection *))(intptr_t)dlsym(RTLD_DEFAULT,"FAMNoExists");
603 	if (FAMNoExists_fn) FAMNoExists_fn(&scf->fam);
604       #endif
605       #endif
606 
607 	scf->fd = FAMCONNECTION_GETFD(&scf->fam);
608 	fdevent_setfd_cloexec(scf->fd);
609   #endif
610 	scf->fdn = fdevent_register(scf->ev, scf->fd, stat_cache_handle_fdevent, scf);
611 	fdevent_fdnode_event_set(scf->ev, scf->fdn, FDEVENT_IN | FDEVENT_RDHUP);
612 
613 	return scf;
614 }
615 
616 static void stat_cache_free_fam(stat_cache_fam *scf) {
617 	if (NULL == scf) return;
618 
619       #ifdef HAVE_SYS_INOTIFY_H
620 	while (scf->wds) {
621 		splay_tree *node = scf->wds;
622 		scf->wds = splaytree_delete(scf->wds, node->key);
623 	}
624       #elif defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE
625 	/*(quicker cleanup to close kqueue() before cancel per entry)*/
626 	close(scf->fd);
627 	scf->fd = -1;
628       #endif
629 	while (scf->dirs) {
630 		/*(skip entry invalidation and FAMCancelMonitor())*/
631 		splay_tree *node = scf->dirs;
632 		fam_dir_entry_free((fam_dir_entry *)node->data);
633 		scf->dirs = splaytree_delete(scf->dirs, node->key);
634 	}
635 
636 	if (-1 != scf->fd) {
637 		/*scf->fdn already cleaned up in fdevent_free()*/
638 		FAMClose(&scf->fam);
639 		/*scf->fd = -1;*/
640 	}
641 
642 	free(scf);
643 }
644 
645 static fam_dir_entry * fam_dir_monitor(stat_cache_fam *scf, char *fn, uint32_t dirlen, struct stat *st)
646 {
647     if (NULL == scf->fdn) return NULL; /* FAM connection closed; do nothing */
648     const int fn_is_dir = S_ISDIR(st->st_mode);
649     /*force_assert(0 != dirlen);*/
650     /*force_assert(fn[0] == '/');*/
651     /* consistency: ensure fn does not end in '/' unless root "/"
652      * FAM events will not end in '/', so easier to match this way */
653     if (fn[dirlen-1] == '/') --dirlen;
654     if (0 == dirlen) dirlen = 1; /* root dir ("/") */
655     /* Note: paths are expected to be normalized before calling stat_cache,
656      * e.g. without repeated '/' */
657     if (!fn_is_dir) {
658         while (fn[--dirlen] != '/') ;
659         if (0 == dirlen) dirlen = 1; /*(should not happen for file)*/
660     }
661     int dir_ndx = splaytree_djbhash(fn, dirlen);
662     fam_dir_entry *fam_dir = NULL;
663 
664     scf->dirs = splaytree_splay(scf->dirs, dir_ndx);
665     if (NULL != scf->dirs && scf->dirs->key == dir_ndx) {
666         fam_dir = scf->dirs->data;
667         if (!buffer_is_equal_string(fam_dir->name, fn, dirlen)) {
668             /* hash collision; preserve existing
669              * do not monitor new to avoid cache thrashing */
670             return NULL;
671         }
672         /* directory already registered */
673     }
674 
675     const time_t cur_ts = log_epoch_secs;
676     struct stat lst;
677     int ck_dir = fn_is_dir;
678     if (!fn_is_dir && (NULL==fam_dir || cur_ts - fam_dir->stat_ts >= 16)) {
679         ck_dir = 1;
680         /*(temporarily modify fn)*/
681         fn[dirlen] = '\0';
682         if (0 != lstat(fn, &lst)) {
683             fn[dirlen] = '/';
684             return NULL;
685         }
686         if (!S_ISLNK(lst.st_mode)) {
687             st = &lst;
688         }
689         else if (0 != stat(fn, st)) { /*st passed in now is stat() of dir*/
690             fn[dirlen] = '/';
691             return NULL;
692         }
693         fn[dirlen] = '/';
694     }
695 
696     int ck_lnk = (NULL == fam_dir);
697     if (ck_dir && NULL != fam_dir) {
698         /* check stat() matches device and inode, just in case an external event
699          * not being monitored occurs (e.g. rename of unmonitored parent dir)*/
700         if (st->st_dev != fam_dir->st_dev || st->st_ino != fam_dir->st_ino) {
701             ck_lnk = 1;
702             /*(modifies scf->dirs but no need to re-splay for dir_ndx since
703              * fam_dir is not NULL and so splaytree_insert not called below)*/
704             if (scf->dirs) fam_dir_invalidate_tree(scf->dirs, fn, dirlen);
705             if (!fn_is_dir) /*(if dir, caller is updating stat_cache_entry)*/
706                 stat_cache_update_entry(fn, dirlen, st, NULL);
707             /*(must not delete tree since caller is holding a valid node)*/
708             stat_cache_invalidate_dir_tree(fn, dirlen);
709           #ifdef HAVE_SYS_INOTIFY_H
710             scf->wds = splaytree_delete(scf->wds, fam_dir->req);
711           #endif
712             if (0 != FAMCancelMonitor(&scf->fam, &fam_dir->req)
713                 || 0 != FAMMonitorDirectory(&scf->fam, fam_dir->name->ptr,
714                                             &fam_dir->req,
715                                             (void *)(intptr_t)dir_ndx)) {
716                 fam_dir->stat_ts = 0; /* invalidate */
717                 return NULL;
718             }
719             fam_dir->st_dev = st->st_dev;
720             fam_dir->st_ino = st->st_ino;
721           #ifdef HAVE_SYS_INOTIFY_H
722             scf->wds = splaytree_insert(scf->wds, fam_dir->req, fam_dir);
723           #endif
724         }
725         fam_dir->stat_ts = cur_ts;
726     }
727 
728     if (NULL == fam_dir) {
729         fam_dir = fam_dir_entry_init(fn, dirlen);
730 
731         if (0 != FAMMonitorDirectory(&scf->fam,fam_dir->name->ptr,&fam_dir->req,
732                                      (void *)(intptr_t)dir_ndx)) {
733           #if defined(HAVE_SYS_INOTIFY_H) \
734            || (defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE)
735             log_perror(scf->errh, __FILE__, __LINE__,
736               "monitoring dir failed: %s file: %s",
737               fam_dir->name->ptr, fn);
738           #else
739             log_error(scf->errh, __FILE__, __LINE__,
740               "monitoring dir failed: %s file: %s %s",
741               fam_dir->name->ptr, fn, FamErrlist[FAMErrno]);
742           #endif
743             fam_dir_entry_free(fam_dir);
744             return NULL;
745         }
746 
747         scf->dirs = splaytree_insert(scf->dirs, dir_ndx, fam_dir);
748       #ifdef HAVE_SYS_INOTIFY_H
749         scf->wds = splaytree_insert(scf->wds, fam_dir->req, fam_dir);
750       #endif
751         fam_dir->stat_ts= cur_ts;
752         fam_dir->st_dev = st->st_dev;
753         fam_dir->st_ino = st->st_ino;
754     }
755 
756     if (ck_lnk) {
757         if (fn_is_dir) {
758             /*(temporarily modify fn)*/
759             char e = fn[dirlen];
760             fn[dirlen] = '\0';
761             if (0 != lstat(fn, &lst)) {
762                 fn[dirlen] = e;
763                 return NULL;
764             }
765             fn[dirlen] = e;
766         }
767         if (fam_dir->fam_parent) {
768             --fam_dir->fam_parent->refcnt;
769             fam_dir->fam_parent = NULL;
770         }
771         if (S_ISLNK(lst.st_mode)) {
772             fam_dir->fam_parent = fam_dir_monitor(scf, fn, dirlen, &lst);
773         }
774     }
775 
776     ++fam_dir->refcnt;
777     return fam_dir;
778 }
779 
780 #endif
781 
782 
783 static stat_cache_entry * stat_cache_entry_init(void) {
784     stat_cache_entry *sce = calloc(1, sizeof(*sce));
785     force_assert(NULL != sce);
786     sce->fd = -1;
787     sce->refcnt = 1;
788     return sce;
789 }
790 
791 static void stat_cache_entry_free(void *data) {
792     stat_cache_entry *sce = data;
793     if (!sce) return;
794 
795     if (--sce->refcnt) return;
796 
797   #ifdef HAVE_FAM_H
798     /*(decrement refcnt only;
799      * defer cancelling FAM monitor on dir even if refcnt reaches zero)*/
800     if (sce->fam_dir) --((fam_dir_entry *)sce->fam_dir)->refcnt;
801   #endif
802 
803     free(sce->name.ptr);
804     free(sce->etag.ptr);
805     if (sce->content_type.size) free(sce->content_type.ptr);
806     if (sce->fd >= 0) close(sce->fd);
807 
808     free(sce);
809 }
810 
811 void stat_cache_entry_refchg(void *data, int mod) {
812     /*(expect mod == -1 or mod == 1)*/
813     stat_cache_entry * const sce = data;
814     if (mod < 0 && 1 == sce->refcnt)
815         stat_cache_entry_free(data);
816     else
817         sce->refcnt += mod;
818 }
819 
820 #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
821 
822 static const char *attrname = "Content-Type";
823 static char attrval[128];
824 static buffer attrb = { attrval, 0, 0 };
825 
826 static int stat_cache_attr_get(const char *name) {
827   #if defined(HAVE_XATTR)
828    #if defined(HAVE_SYS_XATTR_H)
829     ssize_t attrlen;
830     if (0 < (attrlen = getxattr(name, attrname,
831                                 attrval, sizeof(attrval)-1)))
832    #else
833     int attrlen = sizeof(attrval)-1;
834     if (0 == attr_get(name, attrname, attrval, &attrlen, 0))
835    #endif
836   #elif defined(HAVE_EXTATTR)
837     ssize_t attrlen;
838     if (0 < (attrlen = extattr_get_file(name, EXTATTR_NAMESPACE_USER, attrname,
839                                         attrval, sizeof(attrval)-1)))
840   #endif
841     {
842         attrval[attrlen] = '\0';
843         attrb.used = (uint32_t)(attrlen + 1);
844         return 1;
845     }
846     return 0;
847 }
848 
849 #endif
850 
851 int stat_cache_init(fdevents *ev, log_error_st *errh) {
852   #ifdef HAVE_FAM_H
853     if (sc.stat_cache_engine == STAT_CACHE_ENGINE_FAM) {
854         sc.scf = stat_cache_init_fam(ev, errh);
855         if (NULL == sc.scf) return 0;
856     }
857   #else
858     UNUSED(ev);
859     UNUSED(errh);
860   #endif
861 
862     return 1;
863 }
864 
865 void stat_cache_free(void) {
866     splay_tree *sptree = sc.files;
867     while (sptree) {
868         stat_cache_entry_free(sptree->data);
869         sptree = splaytree_delete(sptree, sptree->key);
870     }
871     sc.files = NULL;
872 
873   #ifdef HAVE_FAM_H
874     stat_cache_free_fam(sc.scf);
875     sc.scf = NULL;
876   #endif
877 
878   #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
879     attrname = "Content-Type";
880   #endif
881 
882     sc.stat_cache_engine = STAT_CACHE_ENGINE_SIMPLE; /*(default)*/
883 }
884 
885 void stat_cache_xattrname (const char *name) {
886   #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
887     attrname = name;
888   #else
889     UNUSED(name);
890   #endif
891 }
892 
893 int stat_cache_choose_engine (const buffer *stat_cache_string, log_error_st *errh) {
894     if (buffer_string_is_empty(stat_cache_string))
895         sc.stat_cache_engine = STAT_CACHE_ENGINE_SIMPLE;
896     else if (buffer_eq_slen(stat_cache_string, CONST_STR_LEN("simple")))
897         sc.stat_cache_engine = STAT_CACHE_ENGINE_SIMPLE;
898 #ifdef HAVE_SYS_INOTIFY_H
899     else if (buffer_eq_slen(stat_cache_string, CONST_STR_LEN("inotify")))
900         sc.stat_cache_engine = STAT_CACHE_ENGINE_INOTIFY;
901         /*(STAT_CACHE_ENGINE_FAM == STAT_CACHE_ENGINE_INOTIFY)*/
902 #elif defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE
903     else if (buffer_eq_slen(stat_cache_string, CONST_STR_LEN("kqueue")))
904         sc.stat_cache_engine = STAT_CACHE_ENGINE_KQUEUE;
905         /*(STAT_CACHE_ENGINE_FAM == STAT_CACHE_ENGINE_KQUEUE)*/
906 #endif
907 #ifdef HAVE_FAM_H
908     else if (buffer_eq_slen(stat_cache_string, CONST_STR_LEN("fam")))
909         sc.stat_cache_engine = STAT_CACHE_ENGINE_FAM;
910 #endif
911     else if (buffer_eq_slen(stat_cache_string, CONST_STR_LEN("disable")))
912         sc.stat_cache_engine = STAT_CACHE_ENGINE_NONE;
913     else {
914         log_error(errh, __FILE__, __LINE__,
915           "server.stat-cache-engine can be one of \"disable\", \"simple\","
916 #ifdef HAVE_SYS_INOTIFY_H
917           " \"inotify\","
918 #elif defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE
919           " \"kqueue\","
920 #endif
921 #ifdef HAVE_FAM_H
922           " \"fam\","
923 #endif
924           " but not: %s", stat_cache_string->ptr);
925         return -1;
926     }
927     return 0;
928 }
929 
930 const buffer * stat_cache_mimetype_by_ext(const array * const mimetypes, const char * const name, const uint32_t nlen)
931 {
932     const char * const end = name + nlen; /*(end of string)*/
933     const uint32_t used = mimetypes->used;
934     if (used < 16) {
935         for (uint32_t i = 0; i < used; ++i) {
936             /* suffix match */
937             const data_string *ds = (data_string *)mimetypes->data[i];
938             const size_t klen = buffer_string_length(&ds->key);
939             if (klen <= nlen && buffer_eq_icase_ssn(end-klen, ds->key.ptr, klen))
940                 return &ds->value;
941         }
942     }
943     else {
944         const char *s;
945         const data_string *ds;
946         if (nlen) {
947             for (s = end-1; s != name && *s != '/'; --s) ; /*(like memrchr())*/
948             if (*s == '/') ++s;
949         }
950         else {
951             s = name;
952         }
953         /* search for basename, then longest .ext2.ext1, then .ext1, then "" */
954         ds = (const data_string *)array_get_element_klen(mimetypes, s, end - s);
955         if (NULL != ds) return &ds->value;
956         while (++s < end) {
957             while (*s != '.' && ++s != end) ;
958             if (s == end) break;
959             /* search ".ext" then "ext" */
960             ds = (const data_string *)array_get_element_klen(mimetypes, s, end - s);
961             if (NULL != ds) return &ds->value;
962             /* repeat search without leading '.' to handle situation where
963              * admin configured mimetype.assign keys without leading '.' */
964             if (++s < end) {
965                 if (*s == '.') { --s; continue; }
966                 ds = (const data_string *)array_get_element_klen(mimetypes, s, end - s);
967                 if (NULL != ds) return &ds->value;
968             }
969         }
970         /* search for ""; catchall */
971         ds = (const data_string *)array_get_element_klen(mimetypes, CONST_STR_LEN(""));
972         if (NULL != ds) return &ds->value;
973     }
974 
975     return NULL;
976 }
977 
978 #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
979 
980 const buffer * stat_cache_mimetype_by_xattr(const char * const name)
981 {
982     return stat_cache_attr_get(name) ? &attrb : NULL;
983 }
984 
985 const buffer * stat_cache_content_type_get_by_xattr(stat_cache_entry *sce, const array *mimetypes, int use_xattr)
986 {
987     /*(invalid caching if user config has multiple, different
988      * r->conf.mimetypes for same extension (not expected))*/
989     if (!buffer_string_is_empty(&sce->content_type)) return &sce->content_type;
990 
991     if (!S_ISREG(sce->st.st_mode)) return NULL;
992 
993     /* cache mimetype */
994     const buffer *mtype =
995       (use_xattr) ? stat_cache_mimetype_by_xattr(sce->name.ptr) : NULL;
996     if (NULL == mtype)
997         mtype = stat_cache_mimetype_by_ext(mimetypes,CONST_BUF_LEN(&sce->name));
998     if (NULL != mtype) {
999         if (sce->content_type.size) {
1000             buffer_copy_buffer(&sce->content_type, mtype);
1001         }
1002         else if (mtype == &attrb) {
1003             sce->content_type.ptr = NULL;
1004             buffer_copy_buffer(&sce->content_type, mtype);
1005         }
1006         else {
1007             /*(copy pointers from mimetypes array; avoid allocation)*/
1008             sce->content_type.ptr = mtype->ptr;
1009             sce->content_type.used = mtype->used;
1010             /*(leave sce->content_type.size = 0 to flag not-allocated)*/
1011         }
1012     }
1013     else
1014         buffer_clear(&sce->content_type);
1015 
1016     return &sce->content_type;
1017 }
1018 
1019 #else
1020 
1021 const buffer * stat_cache_content_type_get_by_ext(stat_cache_entry *sce, const array *mimetypes)
1022 {
1023     /*(invalid caching if user config has multiple, different
1024      * r->conf.mimetypes for same extension (not expected))*/
1025     if (!buffer_string_is_empty(&sce->content_type)) return &sce->content_type;
1026 
1027     if (!S_ISREG(sce->st.st_mode)) return NULL;
1028 
1029     /* cache mimetype */
1030     const buffer * const mtype =
1031       stat_cache_mimetype_by_ext(mimetypes, CONST_BUF_LEN(&sce->name));
1032     if (NULL != mtype) {
1033         /*(copy pointers from mimetypes array; avoid allocation)*/
1034         sce->content_type.ptr = mtype->ptr;
1035         sce->content_type.used = mtype->used;
1036         /*(leave sce->content_type.size = 0 to flag not-allocated)*/
1037     }
1038     else
1039         buffer_clear(&sce->content_type);
1040 
1041     return &sce->content_type;
1042 }
1043 
1044 #endif
1045 
1046 const buffer * stat_cache_etag_get(stat_cache_entry *sce, int flags) {
1047     /*(invalid caching if user cfg has multiple, different r->conf.etag_flags
1048      * for same path (not expected, since etag flags should be by filesystem))*/
1049     if (!buffer_string_is_empty(&sce->etag)) return &sce->etag;
1050 
1051     if (S_ISREG(sce->st.st_mode) || S_ISDIR(sce->st.st_mode)) {
1052         if (0 == flags) return NULL;
1053         etag_create(&sce->etag, &sce->st, flags);
1054         return &sce->etag;
1055     }
1056 
1057     return NULL;
1058 }
1059 
1060 __attribute_pure__
1061 static int stat_cache_stat_eq(const struct stat * const sta, const struct stat * const stb) {
1062     return
1063       #ifdef st_mtime /* use high-precision timestamp if available */
1064       #if defined(__APPLE__) && defined(__MACH__)
1065         sta->st_mtimespec.tv_nsec == stb->st_mtimespec.tv_nsec
1066       #else
1067         sta->st_mtim.tv_nsec == stb->st_mtim.tv_nsec
1068       #endif
1069       #endif
1070         && sta->st_mtime == stb->st_mtime
1071         && sta->st_size  == stb->st_size
1072         && sta->st_ino   == stb->st_ino
1073         && sta->st_dev   == stb->st_dev;
1074 }
1075 
1076 void stat_cache_update_entry(const char *name, uint32_t len,
1077                              struct stat *st, buffer *etagb)
1078 {
1079     if (sc.stat_cache_engine == STAT_CACHE_ENGINE_NONE) return;
1080     force_assert(0 != len);
1081     if (name[len-1] == '/') { if (0 == --len) len = 1; }
1082     splay_tree **sptree = &sc.files;
1083     stat_cache_entry *sce =
1084       stat_cache_sptree_find(sptree, name, len);
1085     if (sce && buffer_is_equal_string(&sce->name, name, len)) {
1086         if (!stat_cache_stat_eq(&sce->st, st)) {
1087             /* etagb might be NULL to clear etag (invalidate) */
1088             buffer_copy_string_len(&sce->etag, CONST_BUF_LEN(etagb));
1089           #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
1090             buffer_clear(&sce->content_type);
1091           #endif
1092             if (sce->fd >= 0) {
1093                 if (1 == sce->refcnt) {
1094                     close(sce->fd);
1095                     sce->fd = -1;
1096                 }
1097                 else {
1098                     --sce->refcnt; /* stat_cache_entry_free(sce); */
1099                     (*sptree)->data = sce = stat_cache_entry_init();
1100                     buffer_copy_string_len(&sce->name, name, len);
1101                 }
1102             }
1103             sce->st = *st;
1104         }
1105         sce->stat_ts = log_epoch_secs;
1106     }
1107 }
1108 
1109 void stat_cache_delete_entry(const char *name, uint32_t len)
1110 {
1111     if (sc.stat_cache_engine == STAT_CACHE_ENGINE_NONE) return;
1112     force_assert(0 != len);
1113     if (name[len-1] == '/') { if (0 == --len) len = 1; }
1114     splay_tree **sptree = &sc.files;
1115     stat_cache_entry *sce = stat_cache_sptree_find(sptree, name, len);
1116     if (sce && buffer_is_equal_string(&sce->name, name, len)) {
1117         stat_cache_entry_free(sce);
1118         *sptree = splaytree_delete(*sptree, (*sptree)->key);
1119     }
1120 }
1121 
1122 void stat_cache_invalidate_entry(const char *name, uint32_t len)
1123 {
1124     splay_tree **sptree = &sc.files;
1125     stat_cache_entry *sce = stat_cache_sptree_find(sptree, name, len);
1126     if (sce && buffer_is_equal_string(&sce->name, name, len)) {
1127         sce->stat_ts = 0;
1128       #ifdef HAVE_FAM_H
1129         if (sce->fam_dir != NULL) {
1130             --((fam_dir_entry *)sce->fam_dir)->refcnt;
1131             sce->fam_dir = NULL;
1132         }
1133       #endif
1134     }
1135 }
1136 
1137 #ifdef HAVE_FAM_H
1138 
1139 static void stat_cache_invalidate_dir_tree_walk(splay_tree *t,
1140                                                 const char *name, size_t len)
1141 {
1142     if (t->left)  stat_cache_invalidate_dir_tree_walk(t->left,  name, len);
1143     if (t->right) stat_cache_invalidate_dir_tree_walk(t->right, name, len);
1144 
1145     buffer *b = &((stat_cache_entry *)t->data)->name;
1146     size_t blen = buffer_string_length(b);
1147     if (blen > len && b->ptr[len] == '/' && 0 == memcmp(b->ptr, name, len)) {
1148         stat_cache_entry *sce = t->data;
1149         sce->stat_ts = 0;
1150         if (sce->fam_dir != NULL) {
1151             --((fam_dir_entry *)sce->fam_dir)->refcnt;
1152             sce->fam_dir = NULL;
1153         }
1154     }
1155 }
1156 
1157 static void stat_cache_invalidate_dir_tree(const char *name, size_t len)
1158 {
1159     splay_tree * const sptree = sc.files;
1160     if (sptree) stat_cache_invalidate_dir_tree_walk(sptree, name, len);
1161 }
1162 
1163 #endif
1164 
1165 /*
1166  * walk though splay_tree and collect contents of dir tree.
1167  * remove tagged entries in a second loop
1168  */
1169 
1170 static void stat_cache_tag_dir_tree(splay_tree *t, const char *name, size_t len,
1171                                     int *keys, int *ndx)
1172 {
1173     if (*ndx == 8192) return; /*(must match num array entries in keys[])*/
1174     if (t->left)  stat_cache_tag_dir_tree(t->left,  name, len, keys, ndx);
1175     if (t->right) stat_cache_tag_dir_tree(t->right, name, len, keys, ndx);
1176     if (*ndx == 8192) return; /*(must match num array entries in keys[])*/
1177 
1178     buffer *b = &((stat_cache_entry *)t->data)->name;
1179     size_t blen = buffer_string_length(b);
1180     if (blen > len && b->ptr[len] == '/' && 0 == memcmp(b->ptr, name, len))
1181         keys[(*ndx)++] = t->key;
1182 }
1183 
1184 __attribute_noinline__
1185 static void stat_cache_prune_dir_tree(const char *name, size_t len)
1186 {
1187     splay_tree *sptree = sc.files;
1188     int max_ndx, i;
1189     int keys[8192]; /* 32k size on stack */
1190     do {
1191         if (!sptree) break;
1192         max_ndx = 0;
1193         stat_cache_tag_dir_tree(sptree, name, len, keys, &max_ndx);
1194         for (i = 0; i < max_ndx; ++i) {
1195             const int ndx = keys[i];
1196             splay_tree *node = sptree = splaytree_splay(sptree, ndx);
1197             if (node && node->key == ndx) {
1198                 stat_cache_entry_free(node->data);
1199                 sptree = splaytree_delete(sptree, ndx);
1200             }
1201         }
1202     } while (max_ndx == sizeof(keys)/sizeof(int));
1203     sc.files = sptree;
1204 }
1205 
1206 static void stat_cache_delete_tree(const char *name, uint32_t len)
1207 {
1208     stat_cache_delete_entry(name, len);
1209     stat_cache_prune_dir_tree(name, len);
1210 }
1211 
1212 void stat_cache_delete_dir(const char *name, uint32_t len)
1213 {
1214     force_assert(0 != len);
1215     if (name[len-1] == '/') { if (0 == --len) len = 1; }
1216     stat_cache_delete_tree(name, len);
1217   #ifdef HAVE_FAM_H
1218     if (sc.stat_cache_engine == STAT_CACHE_ENGINE_FAM) {
1219         splay_tree **sptree = &sc.scf->dirs;
1220         fam_dir_entry *fam_dir = stat_cache_sptree_find(sptree, name, len);
1221         if (fam_dir && buffer_is_equal_string(fam_dir->name, name, len))
1222             fam_dir_invalidate_node(fam_dir);
1223         if (*sptree) fam_dir_invalidate_tree(*sptree, name, len);
1224         fam_dir_periodic_cleanup();
1225     }
1226   #endif
1227 }
1228 
1229 /***
1230  *
1231  *
1232  *
1233  * returns:
1234  *  - HANDLER_FINISHED on cache-miss (don't forget to reopen the file)
1235  *  - HANDLER_ERROR on stat() failed -> see errno for problem
1236  */
1237 
1238 stat_cache_entry * stat_cache_get_entry(const buffer *name) {
1239 	stat_cache_entry *sce = NULL;
1240 	struct stat st;
1241 	int file_ndx;
1242 
1243 	/* consistency: ensure lookup name does not end in '/' unless root "/"
1244 	 * (but use full path given with stat(), even with trailing '/') */
1245 	int final_slash = 0;
1246 	size_t len = buffer_string_length(name);
1247 	force_assert(0 != len);
1248 	if (name->ptr[len-1] == '/') { final_slash = 1; if (0 == --len) len = 1; }
1249 	/* Note: paths are expected to be normalized before calling stat_cache,
1250 	 * e.g. without repeated '/' */
1251 
1252 	if (name->ptr[0] != '/') {
1253 		errno = EINVAL;
1254 		return NULL;
1255 	}
1256 
1257 	/*
1258 	 * check if the directory for this file has changed
1259 	 */
1260 
1261 	const time_t cur_ts = log_epoch_secs;
1262 
1263 	file_ndx = splaytree_djbhash(name->ptr, len);
1264 	splay_tree *sptree = sc.files = splaytree_splay(sc.files, file_ndx);
1265 
1266 	if (sptree && (sptree->key == file_ndx)) {
1267 		/* we have seen this file already and
1268 		 * don't stat() it again in the same second */
1269 
1270 		sce = sptree->data;
1271 
1272 		/* check if the name is the same, we might have a collision */
1273 
1274 		if (buffer_is_equal_string(&sce->name, name->ptr, len)) {
1275 			if (sc.stat_cache_engine == STAT_CACHE_ENGINE_SIMPLE) {
1276 				if (sce->stat_ts == cur_ts) {
1277 					if (final_slash && !S_ISDIR(sce->st.st_mode)) {
1278 						errno = ENOTDIR;
1279 						return NULL;
1280 					}
1281 					return sce;
1282 				}
1283 			}
1284 		      #ifdef HAVE_FAM_H
1285 			else if (sc.stat_cache_engine == STAT_CACHE_ENGINE_FAM
1286 				 && sce->fam_dir) { /* entry is in monitored dir */
1287 				/* re-stat() periodically, even if monitoring for changes
1288 				 * (due to limitations in stat_cache.c use of FAM)
1289 				 * (gaps due to not continually monitoring an entire tree) */
1290 				if (cur_ts - sce->stat_ts < 16) {
1291 					if (final_slash && !S_ISDIR(sce->st.st_mode)) {
1292 						errno = ENOTDIR;
1293 						return NULL;
1294 					}
1295 					return sce;
1296 				}
1297 			}
1298 		      #endif
1299 		} else {
1300 			/* collision, forget about the entry */
1301 			sce = NULL;
1302 		}
1303 	}
1304 
1305 	if (-1 == stat(name->ptr, &st)) {
1306 		return NULL;
1307 	}
1308 
1309 	if (S_ISREG(st.st_mode)) {
1310 		/* fix broken stat/open for symlinks to reg files with appended slash on freebsd,osx */
1311 		if (name->ptr[buffer_string_length(name) - 1] == '/') {
1312 			errno = ENOTDIR;
1313 			return NULL;
1314 		}
1315 	}
1316 
1317 	if (NULL == sce) {
1318 
1319 		sce = stat_cache_entry_init();
1320 		buffer_copy_string_len(&sce->name, name->ptr, len);
1321 
1322 		/* already splayed file_ndx */
1323 		if (NULL != sptree && sptree->key == file_ndx) {
1324 			/* hash collision: replace old entry */
1325 			stat_cache_entry_free(sptree->data);
1326 			sptree->data = sce;
1327 		} else {
1328 			sptree = sc.files = splaytree_insert(sptree, file_ndx, sce);
1329 		}
1330 
1331 	} else {
1332 
1333 		buffer_clear(&sce->etag);
1334 	      #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
1335 		buffer_clear(&sce->content_type);
1336 	      #endif
1337 
1338 	}
1339 
1340 	if (sce->fd >= 0) {
1341 		/* close fd if file changed */
1342 		if (!stat_cache_stat_eq(&sce->st, &st)) {
1343 			if (1 == sce->refcnt) {
1344 				close(sce->fd);
1345 				sce->fd = -1;
1346 			}
1347 			else {
1348 				--sce->refcnt; /* stat_cache_entry_free(sce); */
1349 				sptree->data = sce = stat_cache_entry_init();
1350 				buffer_copy_string_len(&sce->name, name->ptr, len);
1351 			}
1352 		}
1353 	}
1354 
1355 	sce->st = st; /*(copy prior to calling fam_dir_monitor())*/
1356 
1357 #ifdef HAVE_FAM_H
1358 	if (sc.stat_cache_engine == STAT_CACHE_ENGINE_FAM) {
1359 		if (sce->fam_dir) --((fam_dir_entry *)sce->fam_dir)->refcnt;
1360 		sce->fam_dir =
1361 		  fam_dir_monitor(sc.scf, CONST_BUF_LEN(name), &st);
1362 	      #if 0 /*(performed below)*/
1363 		if (NULL != sce->fam_dir) {
1364 			/*(may have been invalidated by dir change)*/
1365 			sce->stat_ts = cur_ts;
1366 		}
1367 	      #endif
1368 	}
1369 #endif
1370 
1371 	sce->stat_ts = cur_ts;
1372 	return sce;
1373 }
1374 
1375 stat_cache_entry * stat_cache_get_entry_open(const buffer * const name, const int symlinks) {
1376     stat_cache_entry * const sce = stat_cache_get_entry(name);
1377     if (NULL == sce) return NULL;
1378     if (sce->fd >= 0) return sce;
1379     if (sce->st.st_size > 0)
1380         sce->fd = stat_cache_open_rdonly_fstat(name, &sce->st, symlinks);
1381     return sce; /* (note: sce->fd might still be -1 if open() failed) */
1382 }
1383 
1384 const stat_cache_st * stat_cache_path_stat (const buffer * const name) {
1385     const stat_cache_entry * const sce = stat_cache_get_entry(name);
1386     return sce ? &sce->st : NULL;
1387 }
1388 
1389 int stat_cache_path_isdir(const buffer *name) {
1390     const stat_cache_entry * const sce = stat_cache_get_entry(name);
1391     return (sce && (S_ISDIR(sce->st.st_mode) ? 1 : (errno = ENOTDIR, 0)));
1392 }
1393 
1394 int stat_cache_path_contains_symlink(const buffer *name, log_error_st *errh) {
1395     /* caller should check for symlinks only if we should block symlinks. */
1396 
1397     /* catch the obvious symlinks
1398      *
1399      * this is not a secure check as we still have a race-condition between
1400      * the stat() and the open. We can only solve this by
1401      * 1. open() the file
1402      * 2. fstat() the fd
1403      *
1404      * and keeping the file open for the rest of the time. But this can
1405      * only be done at network level.
1406      * */
1407 
1408   #ifdef HAVE_LSTAT
1409     /* we assume "/" can not be symlink,
1410      * so skip the symlink stuff if path is "/" */
1411     size_t len = buffer_string_length(name);
1412     force_assert(0 != len);
1413     force_assert(name->ptr[0] == '/');
1414     if (1 == len) return 0;
1415    #ifndef PATH_MAX
1416    #define PATH_MAX 4096
1417    #endif
1418     if (len >= PATH_MAX) return -1;
1419 
1420     char buf[PATH_MAX];
1421     memcpy(buf, name->ptr, len);
1422     char *s_cur = buf+len;
1423     do {
1424         *s_cur = '\0';
1425         struct stat st;
1426         if (0 == lstat(buf, &st)) {
1427             if (S_ISLNK(st.st_mode)) return 1;
1428         }
1429         else {
1430             log_perror(errh, __FILE__, __LINE__, "lstat failed for: %s", buf);
1431             return -1;
1432         }
1433     } while ((s_cur = strrchr(buf, '/')) > buf); /*(&buf[0]==buf; NULL < buf)*/
1434   #endif
1435 
1436     return 0;
1437 }
1438 
1439 int stat_cache_open_rdonly_fstat (const buffer *name, struct stat *st, int symlinks) {
1440 	/*(Note: O_NOFOLLOW affects only the final path segment, the target file,
1441 	 * not any intermediate symlinks along the path)*/
1442 	const int fd = fdevent_open_cloexec(name->ptr, symlinks, O_RDONLY, 0);
1443 	if (fd >= 0) {
1444 		if (0 == fstat(fd, st)) {
1445 			return fd;
1446 		} else {
1447 			const int errnum = errno;
1448 			close(fd);
1449 			errno = errnum;
1450 		}
1451 	}
1452 	return -1;
1453 }
1454 
1455 /**
1456  * remove stat() from cache which haven't been stat()ed for
1457  * more than 2 seconds
1458  *
1459  *
1460  * walk though the stat-cache, collect the ids which are too old
1461  * and remove them in a second loop
1462  */
1463 
1464 static void stat_cache_tag_old_entries(splay_tree * const t, int * const keys, int * const ndx, const time_t max_age, const time_t cur_ts) {
1465     if (*ndx == 8192) return; /*(must match num array entries in keys[])*/
1466     if (t->left)
1467         stat_cache_tag_old_entries(t->left, keys, ndx, max_age, cur_ts);
1468     if (t->right)
1469         stat_cache_tag_old_entries(t->right, keys, ndx, max_age, cur_ts);
1470     if (*ndx == 8192) return; /*(must match num array entries in keys[])*/
1471 
1472     const stat_cache_entry * const sce = t->data;
1473     if (cur_ts - sce->stat_ts > max_age)
1474         keys[(*ndx)++] = t->key;
1475 }
1476 
1477 static void stat_cache_periodic_cleanup(const time_t max_age, const time_t cur_ts) {
1478     splay_tree *sptree = sc.files;
1479     int max_ndx, i;
1480     int keys[8192]; /* 32k size on stack */
1481     do {
1482         if (!sptree) break;
1483         max_ndx = 0;
1484         stat_cache_tag_old_entries(sptree, keys, &max_ndx, max_age, cur_ts);
1485         for (i = 0; i < max_ndx; ++i) {
1486             int ndx = keys[i];
1487             sptree = splaytree_splay(sptree, ndx);
1488             if (sptree && sptree->key == ndx) {
1489                 stat_cache_entry_free(sptree->data);
1490                 sptree = splaytree_delete(sptree, ndx);
1491             }
1492         }
1493     } while (max_ndx == sizeof(keys)/sizeof(int));
1494     sc.files = sptree;
1495 }
1496 
1497 void stat_cache_trigger_cleanup(void) {
1498 	time_t max_age = 2;
1499 
1500       #ifdef HAVE_FAM_H
1501 	if (STAT_CACHE_ENGINE_FAM == sc.stat_cache_engine) {
1502 		if (log_epoch_secs & 0x1F) return;
1503 		/* once every 32 seconds (0x1F == 31) */
1504 		max_age = 32;
1505 		fam_dir_periodic_cleanup();
1506 		/* By doing this before stat_cache_periodic_cleanup(),
1507 		 * entries used within the next max_age secs will remain
1508 		 * monitored, instead of effectively flushing and
1509 		 * rebuilding the FAM monitoring every max_age seconds */
1510 	}
1511       #endif
1512 
1513 	stat_cache_periodic_cleanup(max_age, log_epoch_secs);
1514 }
1515