xref: /lighttpd1.4/src/stat_cache.c (revision ed2c6983)
1 #include "first.h"
2 
3 #include "stat_cache.h"
4 #include "log.h"
5 #include "fdevent.h"
6 #include "http_etag.h"
7 #include "algo_splaytree.h"
8 
9 #include <sys/types.h>
10 #include <sys/stat.h>
11 
12 #include <stdlib.h>
13 #include <string.h>
14 #include <errno.h>
15 #include <unistd.h>
16 #include <fcntl.h>
17 
18 #if defined(HAVE_SYS_XATTR_H)
19 # include <sys/xattr.h>
20 #elif defined(HAVE_ATTR_ATTRIBUTES_H)
21 # include <attr/attributes.h>
22 #endif
23 
24 #ifdef HAVE_SYS_EXTATTR_H
25 # include <sys/extattr.h>
26 #endif
27 
28 #ifndef HAVE_LSTAT
29 #define lstat stat
30 #ifndef S_ISLNK
31 #define S_ISLNK(mode) (0)
32 #endif
33 #endif
34 
35 /*
36  * stat-cache
37  *
38  * - a splay-tree is used as we can use the caching effect of it
39  */
40 
41 enum {
42   STAT_CACHE_ENGINE_SIMPLE  = 0  /*(default)*/
43  ,STAT_CACHE_ENGINE_NONE    = 1
44  ,STAT_CACHE_ENGINE_FAM     = 2  /* same as STAT_CACHE_ENGINE_INOTIFY */
45  ,STAT_CACHE_ENGINE_INOTIFY = 2  /* same as STAT_CACHE_ENGINE_FAM */
46  ,STAT_CACHE_ENGINE_KQUEUE  = 2  /* same as STAT_CACHE_ENGINE_FAM */
47 };
48 
49 struct stat_cache_fam;  /* declaration */
50 
51 typedef struct stat_cache {
52 	int stat_cache_engine;
53 	splay_tree *files; /* nodes of tree are (stat_cache_entry *) */
54 	struct stat_cache_fam *scf;
55 } stat_cache;
56 
57 static stat_cache sc;
58 
59 
60 static void * stat_cache_sptree_find(splay_tree ** const sptree,
61                                      const char * const name,
62                                      uint32_t len)
63 {
64     const int ndx = splaytree_djbhash(name, len);
65     *sptree = splaytree_splay(*sptree, ndx);
66     return (*sptree && (*sptree)->key == ndx) ? (*sptree)->data : NULL;
67 }
68 
69 
70 #if defined(HAVE_SYS_INOTIFY_H) \
71  || (defined(HAVE_SYS_EVENT_H) && defined(HAVE_KQUEUE))
72 #ifndef HAVE_FAM_H
73 #define HAVE_FAM_H
74 #endif
75 #endif
76 
77 #ifdef HAVE_FAM_H
78 
79 /* monitor changes in directories using FAM
80  *
81  * This implementation employing FAM monitors directories as they are used,
82  * and maintains a reference count for cache use within stat_cache.c.
83  * A periodic job runs in lighttpd every 32 seconds, expiring entries unused
84  * in last 64 seconds out of the cache and cancelling FAM monitoring.  Items
85  * within the cache are checked against the filesystem upon use if last stat()
86  * was greater than or equal to 16 seconds ago.
87  *
88  * This implementation does not monitor every directory in a tree, and therefore
89  * the cache may get out-of-sync with the filesystem.  Delays in receiving and
90  * processing events from FAM might also lead to stale cache entries.
91  *
92  * For many websites, a large number of files are seldom, if ever, modified,
93  * and a common practice with images is to create a new file with a new name
94  * when a new version is needed, in order for client browsers and CDNs to better
95  * cache the content.  Given this, most use will see little difference in
96  * performance between server.stat-cache-engine = "fam" and "simple" (default).
97  * The default server.stat-cache-engine = "simple" calls stat() on a target once
98  * per second, and reuses that information until the next second.  For use where
99  * changes must be immediately visible, server.stat-cache-engine = "disable"
100  * should be used.
101  *
102  * When considering use of server.stat-cache-engine = "fam", there are a few
103  * additional limitations for this cache implementation using FAM.
104  * - symlinks to files located outside of the current directory do not result
105  *   in changes to that file being monitored (unless that file is in a directory
106  *   which is monitored as a result of a different request).  symlinks can be
107  *   chained and can be circular.  This implementation *does not* readlink() or
108  *   realpath() to resolve the chains to find and monitor the ultimate target
109  *   directory.  While symlinks to files located outside the current directory
110  *   are not monitored, symlinks to directories *are* monitored, though chains
111  *   of symlinks to directories do not result in monitoring of the directories
112  *   containing intermediate symlinks to the target directory.
113  * - directory rename of a directory which is not currently being monitored will
114  *   result in stale information in the cache if there is a subdirectory that is
115  *   being monitored.
116  * Even though lighttpd will not receive FAM events in the above cases, lighttpd
117  * does re-validate the information in the cache upon use if the cache entry has
118  * not been checked in 16 seconds, so that is the upper limit for use of stale
119  * data.
120  *
121  * Use of server.stat-cache-engine = "fam" is discouraged for extremely volatile
122  * directories such as temporary directories (e.g. /tmp and maybe /var/tmp) due
123  * to the overhead of processing the additional noise generated from changes.
124  * Related, server.stat-cache-engine = "fam" is not recommended on trees of
125  * untrusted files where a malicious user could generate an excess of change
126  * events.
127  *
128  * Internal note: lighttpd walks the caches to prune trees in stat_cache when an
129  * event is received for a directory (or symlink to a directory) which has been
130  * deleted or renamed.  The splaytree data structure is suboptimal for frequent
131  * changes of large directories trees where there have been a large number of
132  * different files recently accessed and part of the stat_cache.
133  */
134 
135 #if defined(HAVE_SYS_INOTIFY_H) \
136  && !(defined(HAVE_SYS_EVENT_H) && defined(HAVE_KQUEUE))
137 
138 #include <sys/inotify.h>
139 
140 /*(translate FAM API to inotify; this is specific to stat_cache.c use of FAM)*/
141 #define fam fd /*(translate struct stat_cache_fam scf->fam -> scf->fd)*/
142 typedef int FAMRequest; /*(fr)*/
143 #define FAMClose(fd) \
144         close(*(fd))
145 #define FAMCancelMonitor(fd, wd) \
146         inotify_rm_watch(*(fd), *(wd))
147 #define fam_watch_mask ( IN_ATTRIB | IN_CREATE | IN_DELETE | IN_DELETE_SELF \
148                        | IN_MODIFY | IN_MOVE_SELF | IN_MOVED_FROM \
149                        | IN_EXCL_UNLINK | IN_ONLYDIR )
150                      /*(note: follows symlinks; not providing IN_DONT_FOLLOW)*/
151 #define FAMMonitorDirectory(fd, fn, wd, userData) \
152         ((*(wd) = inotify_add_watch(*(fd), (fn), (fam_watch_mask))) < 0)
153 typedef enum FAMCodes { /*(copied from fam.h to define arbitrary enum values)*/
154     FAMChanged=1,
155     FAMDeleted=2,
156     FAMCreated=5,
157     FAMMoved=6,
158 } FAMCodes;
159 
160 #elif defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE
161 #undef HAVE_SYS_INOTIFY_H
162 
163 #include <sys/event.h>
164 #include <sys/time.h>
165 
166 /*(translate FAM API to inotify; this is specific to stat_cache.c use of FAM)*/
167 #define fam fd /*(translate struct stat_cache_fam scf->fam -> scf->fd)*/
168 typedef int FAMRequest; /*(fr)*/
169 #define FAMClose(fd) \
170         (-1 != (*(fd)) ? close(*(fd)) : 0)
171 static int FAMCancelMonitor (const int * const fd, int * const wd)
172 {
173     if (-1 == *fd) return 0;
174     if (-1 == *wd) return 0;
175     struct timespec t0 = { 0, 0 };
176     struct kevent kev;
177     EV_SET(&kev, *wd, EVFILT_VNODE, EV_DELETE, 0, 0, 0);
178     int rc = kevent(*fd, &kev, 1, NULL, 0, &t0);
179     close(*wd);
180     *wd = -1;
181     return rc;
182 }
183 static int FAMMonitorDirectory (int * const fd, char * const fn, int * const wd, void * const userData)
184 {
185     *wd = fdevent_open_dirname(fn, 1); /*(note: follows symlinks)*/
186     if (-1 == *wd) return -1;
187     struct timespec t0 = { 0, 0 };
188     struct kevent kev;
189     unsigned short kev_flags = EV_ADD | EV_ENABLE | EV_CLEAR;
190     unsigned int kev_fflags = NOTE_ATTRIB | NOTE_EXTEND | NOTE_LINK | NOTE_WRITE
191                             | NOTE_DELETE | NOTE_REVOKE | NOTE_RENAME;
192     EV_SET(&kev, *wd, EVFILT_VNODE, kev_flags, kev_fflags, 0, userData);
193     return kevent(*fd, &kev, 1, NULL, 0, &t0);
194 }
195 typedef enum FAMCodes { /*(copied from fam.h to define arbitrary enum values)*/
196     FAMChanged=1,
197     FAMDeleted=2,
198     FAMCreated=5,
199     FAMMoved=6,
200 } FAMCodes;
201 
202 #else
203 
204 #include <fam.h>
205 
206 #ifdef HAVE_FAMNOEXISTS
207 #ifndef LIGHTTPD_STATIC
208 #ifdef HAVE_DLFCN_H
209 #include <dlfcn.h>
210 #endif
211 #endif
212 #endif
213 
214 #endif
215 
216 typedef struct fam_dir_entry {
217 	buffer name;
218 	int refcnt;
219 	FAMRequest req;
220 	unix_time64_t stat_ts;
221 	dev_t st_dev;
222 	ino_t st_ino;
223 	struct fam_dir_entry *fam_parent;
224 } fam_dir_entry;
225 
226 typedef struct stat_cache_fam {
227 	splay_tree *dirs; /* indexed by path; node data is fam_dir_entry */
228   #ifdef HAVE_SYS_INOTIFY_H
229 	splay_tree *wds;  /* indexed by inotify watch descriptor */
230   #elif defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE
231   #else
232 	FAMConnection fam;
233   #endif
234 	log_error_st *errh;
235 	fdevents *ev;
236 	fdnode *fdn;
237 	int fd;
238 } stat_cache_fam;
239 
240 __attribute_returns_nonnull__
241 static fam_dir_entry * fam_dir_entry_init(const char *name, size_t len)
242 {
243     fam_dir_entry * const fam_dir = calloc(1, sizeof(*fam_dir));
244     force_assert(NULL != fam_dir);
245 
246     buffer_copy_string_len(&fam_dir->name, name, len);
247     fam_dir->refcnt = 0;
248   #if defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE
249     fam_dir->req = -1;
250   #endif
251 
252     return fam_dir;
253 }
254 
255 static void fam_dir_entry_free(fam_dir_entry *fam_dir)
256 {
257     if (!fam_dir) return;
258     /*(fam_dir->fam_parent might be invalid pointer here; ignore)*/
259     free(fam_dir->name.ptr);
260   #if defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE
261     if (-1 != fam_dir->req)
262         close(fam_dir->req);
263   #endif
264     free(fam_dir);
265 }
266 
267 static void fam_dir_invalidate_node(fam_dir_entry *fam_dir)
268 {
269     fam_dir->stat_ts = 0;
270     if (fam_dir->fam_parent) {
271         --fam_dir->fam_parent->refcnt;
272         fam_dir->fam_parent = NULL;
273     }
274 }
275 
276 /*
277  * walk though splay_tree and collect contents of dir tree.
278  * remove tagged entries in a second loop
279  */
280 
281 static void fam_dir_tag_refcnt(splay_tree *t, int *keys, int *ndx)
282 {
283     if (*ndx == 512) return; /*(must match num array entries in keys[])*/
284     if (t->left)  fam_dir_tag_refcnt(t->left,  keys, ndx);
285     if (t->right) fam_dir_tag_refcnt(t->right, keys, ndx);
286     if (*ndx == 512) return; /*(must match num array entries in keys[])*/
287 
288     fam_dir_entry * const fam_dir = t->data;
289     if (0 == fam_dir->refcnt) {
290         fam_dir_invalidate_node(fam_dir);
291         keys[(*ndx)++] = t->key;
292     }
293 }
294 
295 __attribute_noinline__
296 static void fam_dir_periodic_cleanup() {
297     stat_cache_fam * const scf = sc.scf;
298     int max_ndx, i;
299     int keys[512]; /* 2k size on stack */
300   #if defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE
301     struct kevent kevl[512]; /* 32k size on stack to batch kevent EV_DELETE */
302   #endif
303     do {
304         if (!scf->dirs) break;
305         max_ndx = 0;
306         fam_dir_tag_refcnt(scf->dirs, keys, &max_ndx);
307         for (i = 0; i < max_ndx; ++i) {
308             const int ndx = keys[i];
309             splay_tree *node = scf->dirs = splaytree_splay(scf->dirs, ndx);
310             if (node && node->key == ndx) {
311                 fam_dir_entry *fam_dir = node->data;
312                 scf->dirs = splaytree_delete(scf->dirs, ndx);
313               #ifdef HAVE_SYS_INOTIFY_H
314                 scf->wds = splaytree_delete(scf->wds, fam_dir->req);
315               #elif defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE
316                 /* batch process kevent removal; defer cancel */
317                 EV_SET(kevl+i, fam_dir->req, EVFILT_VNODE, EV_DELETE, 0, 0, 0);
318                 fam_dir->req = -1; /*(make FAMCancelMonitor() a no-op)*/
319               #endif
320                 FAMCancelMonitor(&scf->fam, &fam_dir->req);
321                 fam_dir_entry_free(fam_dir);
322             }
323         }
324       #if defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE
325         /* batch process: kevent() to submit EV_DELETE, then close dir fds */
326         if (0 == max_ndx) break;
327         struct timespec t0 = { 0, 0 };
328         kevent(scf->fd, kevl, max_ndx, NULL, 0, &t0);
329         for (i = 0; i < max_ndx; ++i)
330             close((int)kevl[i].ident);
331       #endif
332     } while (max_ndx == sizeof(keys)/sizeof(int));
333 }
334 
335 static void fam_dir_invalidate_tree(splay_tree *t, const char *name, size_t len)
336 {
337   #ifdef __clang_analyzer__
338     force_assert(name);
339   #endif
340     /*force_assert(t);*/
341     if (t->left)  fam_dir_invalidate_tree(t->left,  name, len);
342     if (t->right) fam_dir_invalidate_tree(t->right, name, len);
343 
344     fam_dir_entry * const fam_dir = t->data;
345   #ifdef __clang_analyzer__
346     force_assert(fam_dir);
347   #endif
348     const buffer * const b = &fam_dir->name;
349     size_t blen = buffer_clen(b);
350     if (blen > len && b->ptr[len] == '/' && 0 == memcmp(b->ptr, name, len))
351         fam_dir_invalidate_node(fam_dir);
352 }
353 
354 /* declarations */
355 static void stat_cache_delete_tree(const char *name, uint32_t len);
356 static void stat_cache_invalidate_dir_tree(const char *name, size_t len);
357 static void stat_cache_handle_fdevent_fn(stat_cache_fam * const scf, fam_dir_entry * const fam_dir, const char * const fn, const uint32_t fnlen, int code);
358 
359 static void stat_cache_handle_fdevent_in(stat_cache_fam *scf)
360 {
361   #ifdef HAVE_SYS_INOTIFY_H
362     /*(inotify pads in->len to align struct following in->name[])*/
363     char buf[4096]
364       __attribute__ ((__aligned__(__alignof__(struct inotify_event))));
365     int rd;
366     do {
367         rd = (int)read(scf->fd, buf, sizeof(buf));
368         if (rd <= 0) {
369             if (-1 == rd && errno != EINTR && errno != EAGAIN) {
370                 log_perror(scf->errh, __FILE__, __LINE__, "inotify error");
371                 /* TODO: could flush cache, close scf->fd, and re-open inotify*/
372             }
373             break;
374         }
375         for (int i = 0; i < rd; ) {
376             struct inotify_event * const in =
377               (struct inotify_event *)((uintptr_t)buf + i);
378             uint32_t len = in->len;
379             if (len > sizeof(buf)) break; /*(should not happen)*/
380             i += sizeof(struct inotify_event) + len;
381             if (i > rd) break; /*(should not happen (partial record))*/
382             if (in->mask & IN_CREATE)
383                 continue; /*(see comment below for FAMCreated)*/
384             if (in->mask & IN_Q_OVERFLOW) {
385                 log_error(scf->errh, __FILE__, __LINE__,
386                           "inotify queue overflow");
387                 continue;
388             }
389             /* ignore events which may have been pending for
390              * paths recently cancelled via FAMCancelMonitor() */
391             scf->wds = splaytree_splay(scf->wds, in->wd);
392             if (!scf->wds || scf->wds->key != in->wd)
393                 continue;
394             fam_dir_entry *fam_dir = scf->wds->data;
395             if (NULL == fam_dir)        /*(should not happen)*/
396                 continue;
397             if (fam_dir->req != in->wd) /*(should not happen)*/
398                 continue;
399             /*(specific to use here in stat_cache.c)*/
400             int code = 0;
401             if (in->mask & (IN_ATTRIB | IN_MODIFY))
402                 code = FAMChanged;
403             else if (in->mask & (IN_DELETE | IN_DELETE_SELF | IN_UNMOUNT))
404                 code = FAMDeleted;
405             else if (in->mask & (IN_MOVE_SELF | IN_MOVED_FROM))
406                 code = FAMMoved;
407 
408             if (len) {
409                 do { --len; } while (len && in->name[len-1] == '\0');
410             }
411             stat_cache_handle_fdevent_fn(scf, fam_dir, in->name, len, code);
412         }
413     } while (rd + sizeof(struct inotify_event) + NAME_MAX + 1 > sizeof(buf));
414   #elif defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE
415     struct kevent kevl[256];
416     struct timespec t0 = { 0, 0 };
417     int n;
418     do {
419         n = kevent(scf->fd, NULL, 0, kevl, sizeof(kevl)/sizeof(*kevl), &t0);
420         if (n <= 0) break;
421         for (int i = 0; i < n; ++i) {
422             const struct kevent * const kev = kevl+i;
423             /* ignore events which may have been pending for
424              * paths recently cancelled via FAMCancelMonitor() */
425             int ndx = (int)(intptr_t)kev->udata;
426             scf->dirs = splaytree_splay(scf->dirs, ndx);
427             if (!scf->dirs || scf->dirs->key != ndx)
428                 continue;
429             fam_dir_entry *fam_dir = scf->dirs->data;
430             if (fam_dir->req != (int)kev->ident)
431                 continue;
432             /*(specific to use here in stat_cache.c)*/
433             /* note: stat_cache only monitors on directories,
434              *       so events here are only on directories
435              * note: changes are treated as FAMDeleted since
436              *       it is unknown which file in dir was changed
437              *       This is not efficient, but this stat_cache mechanism also
438              *       should not be used on frequently modified directories. */
439             int code = 0;
440             if (kev->fflags & (NOTE_WRITE|NOTE_ATTRIB|NOTE_EXTEND|NOTE_LINK))
441                 code = FAMDeleted; /*(not FAMChanged; see comment above)*/
442             else if (kev->fflags & (NOTE_DELETE|NOTE_REVOKE))
443                 code = FAMDeleted;
444             else if (kev->fflags & NOTE_RENAME)
445                 code = FAMMoved;
446             if (kev->flags & EV_ERROR) /*(not expected; treat as FAMDeleted)*/
447                 code = FAMDeleted;
448             stat_cache_handle_fdevent_fn(scf, fam_dir, NULL, 0, code);
449         }
450     } while (n == sizeof(kevl)/sizeof(*kevl));
451   #else
452     for (int i = 0, ndx; i || (i = FAMPending(&scf->fam)) > 0; --i) {
453         FAMEvent fe;
454         if (FAMNextEvent(&scf->fam, &fe) < 0) break;
455 
456         /* ignore events which may have been pending for
457          * paths recently cancelled via FAMCancelMonitor() */
458         ndx = (int)(intptr_t)fe.userdata;
459         scf->dirs = splaytree_splay(scf->dirs, ndx);
460         if (!scf->dirs || scf->dirs->key != ndx) {
461             continue;
462         }
463         fam_dir_entry *fam_dir = scf->dirs->data;
464         if (FAMREQUEST_GETREQNUM(&fam_dir->req)
465             != FAMREQUEST_GETREQNUM(&fe.fr)) {
466             continue;
467         }
468 
469         uint32_t fnlen = (fe.code != FAMCreated && fe.filename[0] != '/')
470           ? (uint32_t)strlen(fe.filename)
471           : 0;
472         stat_cache_handle_fdevent_fn(scf, fam_dir, fe.filename, fnlen, fe.code);
473     }
474   #endif
475 }
476 
477 static void stat_cache_handle_fdevent_fn(stat_cache_fam * const scf, fam_dir_entry *fam_dir, const char * const fn, const uint32_t fnlen, int code)
478 {
479         if (fnlen) {
480             buffer * const n = &fam_dir->name;
481             fam_dir_entry *fam_link;
482             uint32_t len;
483             switch (code) {
484             case FAMCreated:
485                 /* file created in monitored dir modifies dir and
486                  * we should get a separate FAMChanged event for dir.
487                  * Therefore, ignore file FAMCreated event here.
488                  * Also, if FAMNoExists() is used, might get spurious
489                  * FAMCreated events as changes are made e.g. in monitored
490                  * sub-sub-sub dirs and the library discovers new (already
491                  * existing) dir entries */
492                 return;
493             case FAMChanged:
494                 /* file changed in monitored dir does not modify dir */
495             case FAMDeleted:
496             case FAMMoved:
497                 /* file deleted or moved in monitored dir modifies dir,
498                  * but FAM provides separate notification for that */
499 
500                 /* temporarily append filename to dir in fam_dir->name to
501                  * construct path, then delete stat_cache entry (if any)*/
502                 len = buffer_clen(n);
503                 buffer_append_path_len(n, fn, fnlen);
504                 /* (alternatively, could chose to stat() and update)*/
505                 stat_cache_invalidate_entry(BUF_PTR_LEN(n));
506 
507                 fam_link = /*(check if might be symlink to monitored dir)*/
508                 stat_cache_sptree_find(&scf->dirs, BUF_PTR_LEN(n));
509                 if (fam_link && !buffer_is_equal(&fam_link->name, n))
510                     fam_link = NULL;
511 
512                 buffer_truncate(n, len);
513 
514                 if (fam_link) {
515                     /* replaced symlink changes containing dir */
516                     stat_cache_invalidate_entry(n->ptr, len);
517                     /* handle symlink to dir as deleted dir below */
518                     code = FAMDeleted;
519                     fam_dir = fam_link;
520                     break;
521                 }
522                 return;
523             default:
524                 return;
525             }
526         }
527 
528         switch(code) {
529         case FAMChanged:
530             stat_cache_invalidate_entry(BUF_PTR_LEN(&fam_dir->name));
531             break;
532         case FAMDeleted:
533         case FAMMoved:
534             stat_cache_delete_tree(BUF_PTR_LEN(&fam_dir->name));
535             fam_dir_invalidate_node(fam_dir);
536             if (scf->dirs)
537                 fam_dir_invalidate_tree(scf->dirs,
538                                         BUF_PTR_LEN(&fam_dir->name));
539             fam_dir_periodic_cleanup();
540             break;
541         default:
542             break;
543         }
544 }
545 
546 static handler_t stat_cache_handle_fdevent(void *ctx, int revent)
547 {
548 	stat_cache_fam * const scf = ctx; /* sc.scf */
549 
550 	if (revent & FDEVENT_IN) {
551 		stat_cache_handle_fdevent_in(scf);
552 	}
553 
554 	if (revent & (FDEVENT_HUP|FDEVENT_RDHUP)) {
555 		/* fam closed the connection */
556 		log_error(scf->errh, __FILE__, __LINE__,
557 		  "FAM connection closed; disabling stat_cache.");
558 		/* (although effectively STAT_CACHE_ENGINE_NONE,
559 		 *  do not change here so that periodic jobs clean up memory)*/
560 		/*sc.stat_cache_engine = STAT_CACHE_ENGINE_NONE; */
561 		fdevent_fdnode_event_del(scf->ev, scf->fdn);
562 		fdevent_unregister(scf->ev, scf->fd);
563 		scf->fdn = NULL;
564 
565 		FAMClose(&scf->fam);
566 		scf->fd = -1;
567 	}
568 
569 	return HANDLER_GO_ON;
570 }
571 
572 static stat_cache_fam * stat_cache_init_fam(fdevents *ev, log_error_st *errh) {
573 	stat_cache_fam *scf = calloc(1, sizeof(*scf));
574 	force_assert(scf);
575 	scf->fd = -1;
576 	scf->ev = ev;
577 	scf->errh = errh;
578 
579   #ifdef HAVE_SYS_INOTIFY_H
580 	scf->fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
581 	if (scf->fd < 0) {
582 		log_perror(errh, __FILE__, __LINE__, "inotify_init1()");
583 		free(scf);
584 		return NULL;
585 	}
586   #elif defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE
587    #ifdef __NetBSD__
588 	scf->fd = kqueue1(O_NONBLOCK|O_CLOEXEC|O_NOSIGPIPE);
589    #else
590 	scf->fd = kqueue();
591 	if (scf->fd >= 0) fdevent_setfd_cloexec(scf->fd);
592    #endif
593 	if (scf->fd < 0) {
594 		log_perror(errh, __FILE__, __LINE__, "kqueue()");
595 		free(scf);
596 		return NULL;
597 	}
598   #else
599 	/* setup FAM */
600 	if (0 != FAMOpen2(&scf->fam, "lighttpd")) {
601 		log_error(errh, __FILE__, __LINE__,
602 		  "could not open a fam connection, dying.");
603 		free(scf);
604 		return NULL;
605 	}
606       #ifdef HAVE_FAMNOEXISTS
607       #ifdef LIGHTTPD_STATIC
608 	FAMNoExists(&scf->fam);
609       #else
610 	int (*FAMNoExists_fn)(FAMConnection *);
611 	FAMNoExists_fn =
612 	  (int (*)(FAMConnection *))(intptr_t)dlsym(RTLD_DEFAULT,"FAMNoExists");
613 	if (FAMNoExists_fn) FAMNoExists_fn(&scf->fam);
614       #endif
615       #endif
616 
617 	scf->fd = FAMCONNECTION_GETFD(&scf->fam);
618 	fdevent_setfd_cloexec(scf->fd);
619   #endif
620 	scf->fdn = fdevent_register(scf->ev, scf->fd, stat_cache_handle_fdevent, scf);
621 	fdevent_fdnode_event_set(scf->ev, scf->fdn, FDEVENT_IN | FDEVENT_RDHUP);
622 
623 	return scf;
624 }
625 
626 static void stat_cache_free_fam(stat_cache_fam *scf) {
627 	if (NULL == scf) return;
628 
629       #ifdef HAVE_SYS_INOTIFY_H
630 	while (scf->wds) {
631 		splay_tree *node = scf->wds;
632 		scf->wds = splaytree_delete(scf->wds, node->key);
633 	}
634       #elif defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE
635 	/*(quicker cleanup to close kqueue() before cancel per entry)*/
636 	close(scf->fd);
637 	scf->fd = -1;
638       #endif
639 	while (scf->dirs) {
640 		/*(skip entry invalidation and FAMCancelMonitor())*/
641 		splay_tree *node = scf->dirs;
642 		fam_dir_entry_free((fam_dir_entry *)node->data);
643 		scf->dirs = splaytree_delete(scf->dirs, node->key);
644 	}
645 
646 	if (-1 != scf->fd) {
647 		/*scf->fdn already cleaned up in fdevent_free()*/
648 		FAMClose(&scf->fam);
649 		/*scf->fd = -1;*/
650 	}
651 
652 	free(scf);
653 }
654 
655 static fam_dir_entry * fam_dir_monitor(stat_cache_fam *scf, char *fn, uint32_t dirlen, struct stat *st)
656 {
657     if (NULL == scf->fdn) return NULL; /* FAM connection closed; do nothing */
658     const int fn_is_dir = S_ISDIR(st->st_mode);
659     /*force_assert(0 != dirlen);*/
660     /*force_assert(fn[0] == '/');*/
661     /* consistency: ensure fn does not end in '/' unless root "/"
662      * FAM events will not end in '/', so easier to match this way */
663     if (fn[dirlen-1] == '/') --dirlen;
664     if (0 == dirlen) dirlen = 1; /* root dir ("/") */
665     /* Note: paths are expected to be normalized before calling stat_cache,
666      * e.g. without repeated '/' */
667     if (!fn_is_dir) {
668         while (fn[--dirlen] != '/') ;
669         if (0 == dirlen) dirlen = 1; /*(should not happen for file)*/
670     }
671     int dir_ndx = splaytree_djbhash(fn, dirlen);
672     fam_dir_entry *fam_dir = NULL;
673 
674     scf->dirs = splaytree_splay(scf->dirs, dir_ndx);
675     if (NULL != scf->dirs && scf->dirs->key == dir_ndx) {
676         fam_dir = scf->dirs->data;
677         if (!buffer_eq_slen(&fam_dir->name, fn, dirlen)) {
678             /* hash collision; preserve existing
679              * do not monitor new to avoid cache thrashing */
680             return NULL;
681         }
682         /* directory already registered */
683     }
684 
685     const unix_time64_t cur_ts = log_monotonic_secs;
686     struct stat lst;
687     int ck_dir = fn_is_dir;
688     if (!fn_is_dir && (NULL==fam_dir || cur_ts - fam_dir->stat_ts >= 16)) {
689         ck_dir = 1;
690         /*(temporarily modify fn)*/
691         fn[dirlen] = '\0';
692         if (0 != lstat(fn, &lst)) {
693             fn[dirlen] = '/';
694             return NULL;
695         }
696         if (!S_ISLNK(lst.st_mode)) {
697             st = &lst;
698         }
699         else if (0 != stat(fn, st)) { /*st passed in now is stat() of dir*/
700             fn[dirlen] = '/';
701             return NULL;
702         }
703         fn[dirlen] = '/';
704     }
705 
706     int ck_lnk = (NULL == fam_dir);
707     if (ck_dir && NULL != fam_dir) {
708         /* check stat() matches device and inode, just in case an external event
709          * not being monitored occurs (e.g. rename of unmonitored parent dir)*/
710         if (st->st_dev != fam_dir->st_dev || st->st_ino != fam_dir->st_ino) {
711             ck_lnk = 1;
712             /*(modifies scf->dirs but no need to re-splay for dir_ndx since
713              * fam_dir is not NULL and so splaytree_insert not called below)*/
714             if (scf->dirs) fam_dir_invalidate_tree(scf->dirs, fn, dirlen);
715             if (!fn_is_dir) /*(if dir, caller is updating stat_cache_entry)*/
716                 stat_cache_update_entry(fn, dirlen, st, NULL);
717             /*(must not delete tree since caller is holding a valid node)*/
718             stat_cache_invalidate_dir_tree(fn, dirlen);
719           #ifdef HAVE_SYS_INOTIFY_H
720             scf->wds = splaytree_delete(scf->wds, fam_dir->req);
721           #endif
722             if (0 != FAMCancelMonitor(&scf->fam, &fam_dir->req)
723                 || 0 != FAMMonitorDirectory(&scf->fam, fam_dir->name.ptr,
724                                             &fam_dir->req,
725                                             (void *)(intptr_t)dir_ndx)) {
726                 fam_dir->stat_ts = 0; /* invalidate */
727                 return NULL;
728             }
729             fam_dir->st_dev = st->st_dev;
730             fam_dir->st_ino = st->st_ino;
731           #ifdef HAVE_SYS_INOTIFY_H
732             scf->wds = splaytree_insert(scf->wds, fam_dir->req, fam_dir);
733           #endif
734         }
735         fam_dir->stat_ts = cur_ts;
736     }
737 
738     if (NULL == fam_dir) {
739         fam_dir = fam_dir_entry_init(fn, dirlen);
740 
741         if (0 != FAMMonitorDirectory(&scf->fam,fam_dir->name.ptr,&fam_dir->req,
742                                      (void *)(intptr_t)dir_ndx)) {
743           #if defined(HAVE_SYS_INOTIFY_H) \
744            || (defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE)
745             log_perror(scf->errh, __FILE__, __LINE__,
746               "monitoring dir failed: %s file: %s",
747               fam_dir->name.ptr, fn);
748           #else
749             log_error(scf->errh, __FILE__, __LINE__,
750               "monitoring dir failed: %s file: %s %s",
751               fam_dir->name.ptr, fn, FamErrlist[FAMErrno]);
752           #endif
753             fam_dir_entry_free(fam_dir);
754             return NULL;
755         }
756 
757         scf->dirs = splaytree_insert(scf->dirs, dir_ndx, fam_dir);
758       #ifdef HAVE_SYS_INOTIFY_H
759         scf->wds = splaytree_insert(scf->wds, fam_dir->req, fam_dir);
760       #endif
761         fam_dir->stat_ts= cur_ts;
762         fam_dir->st_dev = st->st_dev;
763         fam_dir->st_ino = st->st_ino;
764     }
765 
766     if (ck_lnk) {
767         if (fn_is_dir) {
768             /*(temporarily modify fn)*/
769             char e = fn[dirlen];
770             fn[dirlen] = '\0';
771             if (0 != lstat(fn, &lst)) {
772                 fn[dirlen] = e;
773                 return NULL;
774             }
775             fn[dirlen] = e;
776         }
777         if (fam_dir->fam_parent) {
778             --fam_dir->fam_parent->refcnt;
779             fam_dir->fam_parent = NULL;
780         }
781         if (S_ISLNK(lst.st_mode)) {
782             fam_dir->fam_parent = fam_dir_monitor(scf, fn, dirlen, &lst);
783         }
784     }
785 
786     ++fam_dir->refcnt;
787     return fam_dir;
788 }
789 
790 #endif
791 
792 
793 __attribute_malloc__
794 __attribute_returns_nonnull__
795 static stat_cache_entry * stat_cache_entry_init(void) {
796     stat_cache_entry *sce = calloc(1, sizeof(*sce));
797     force_assert(NULL != sce);
798     sce->fd = -1;
799     sce->refcnt = 1;
800     return sce;
801 }
802 
803 static void stat_cache_entry_free(void *data) {
804     stat_cache_entry *sce = data;
805     if (!sce) return;
806 
807     if (--sce->refcnt) return;
808 
809   #ifdef HAVE_FAM_H
810     /*(decrement refcnt only;
811      * defer cancelling FAM monitor on dir even if refcnt reaches zero)*/
812     if (sce->fam_dir) --((fam_dir_entry *)sce->fam_dir)->refcnt;
813   #endif
814 
815     free(sce->name.ptr);
816     free(sce->etag.ptr);
817     if (sce->content_type.size) free(sce->content_type.ptr);
818     if (sce->fd >= 0) close(sce->fd);
819 
820     free(sce);
821 }
822 
823 void stat_cache_entry_refchg(void *data, int mod) {
824     /*(expect mod == -1 or mod == 1)*/
825     stat_cache_entry * const sce = data;
826     if (mod < 0 && 1 == sce->refcnt)
827         stat_cache_entry_free(data);
828     else
829         sce->refcnt += mod;
830 }
831 
832 #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
833 
834 static const char *attrname = "Content-Type";
835 static char attrval[128];
836 static buffer attrb = { attrval, 0, 0 };
837 
838 static int stat_cache_attr_get(const char *name) {
839   #if defined(HAVE_XATTR)
840    #if defined(HAVE_SYS_XATTR_H)
841     ssize_t attrlen;
842     if (0 < (attrlen = getxattr(name, attrname,
843                                 attrval, sizeof(attrval)-1)))
844    #else
845     int attrlen = sizeof(attrval)-1;
846     if (0 == attr_get(name, attrname, attrval, &attrlen, 0))
847    #endif
848   #elif defined(HAVE_EXTATTR)
849     ssize_t attrlen;
850     if (0 < (attrlen = extattr_get_file(name, EXTATTR_NAMESPACE_USER, attrname,
851                                         attrval, sizeof(attrval)-1)))
852   #endif
853     {
854         attrval[attrlen] = '\0';
855         attrb.used = (uint32_t)(attrlen + 1);
856         return 1;
857     }
858     return 0;
859 }
860 
861 #endif
862 
863 int stat_cache_init(fdevents *ev, log_error_st *errh) {
864   #ifdef HAVE_FAM_H
865     if (sc.stat_cache_engine == STAT_CACHE_ENGINE_FAM) {
866         sc.scf = stat_cache_init_fam(ev, errh);
867         if (NULL == sc.scf) return 0;
868     }
869   #else
870     UNUSED(ev);
871     UNUSED(errh);
872   #endif
873 
874     return 1;
875 }
876 
877 void stat_cache_free(void) {
878     splay_tree *sptree = sc.files;
879     while (sptree) {
880         stat_cache_entry_free(sptree->data);
881         sptree = splaytree_delete(sptree, sptree->key);
882     }
883     sc.files = NULL;
884 
885   #ifdef HAVE_FAM_H
886     stat_cache_free_fam(sc.scf);
887     sc.scf = NULL;
888   #endif
889 
890   #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
891     attrname = "Content-Type";
892   #endif
893 
894     sc.stat_cache_engine = STAT_CACHE_ENGINE_SIMPLE; /*(default)*/
895 }
896 
897 void stat_cache_xattrname (const char *name) {
898   #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
899     attrname = name;
900   #else
901     UNUSED(name);
902   #endif
903 }
904 
905 int stat_cache_choose_engine (const buffer *stat_cache_string, log_error_st *errh) {
906     if (buffer_is_blank(stat_cache_string))
907         sc.stat_cache_engine = STAT_CACHE_ENGINE_SIMPLE;
908     else if (buffer_eq_slen(stat_cache_string, CONST_STR_LEN("simple")))
909         sc.stat_cache_engine = STAT_CACHE_ENGINE_SIMPLE;
910 #ifdef HAVE_SYS_INOTIFY_H
911     else if (buffer_eq_slen(stat_cache_string, CONST_STR_LEN("inotify")))
912         sc.stat_cache_engine = STAT_CACHE_ENGINE_INOTIFY;
913         /*(STAT_CACHE_ENGINE_FAM == STAT_CACHE_ENGINE_INOTIFY)*/
914 #elif defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE
915     else if (buffer_eq_slen(stat_cache_string, CONST_STR_LEN("kqueue")))
916         sc.stat_cache_engine = STAT_CACHE_ENGINE_KQUEUE;
917         /*(STAT_CACHE_ENGINE_FAM == STAT_CACHE_ENGINE_KQUEUE)*/
918 #endif
919 #ifdef HAVE_FAM_H
920     else if (buffer_eq_slen(stat_cache_string, CONST_STR_LEN("fam")))
921         sc.stat_cache_engine = STAT_CACHE_ENGINE_FAM;
922 #endif
923     else if (buffer_eq_slen(stat_cache_string, CONST_STR_LEN("disable"))
924              || buffer_eq_slen(stat_cache_string, CONST_STR_LEN("none")))
925         sc.stat_cache_engine = STAT_CACHE_ENGINE_NONE;
926     else {
927         log_error(errh, __FILE__, __LINE__,
928           "server.stat-cache-engine can be one of \"disable\", \"simple\","
929 #ifdef HAVE_SYS_INOTIFY_H
930           " \"inotify\","
931 #elif defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE
932           " \"kqueue\","
933 #endif
934 #ifdef HAVE_FAM_H
935           " \"fam\","
936 #endif
937           " but not: %s", stat_cache_string->ptr);
938         return -1;
939     }
940     return 0;
941 }
942 
943 const buffer * stat_cache_mimetype_by_ext(const array * const mimetypes, const char * const name, const uint32_t nlen)
944 {
945     const char * const end = name + nlen; /*(end of string)*/
946     const uint32_t used = mimetypes->used;
947     if (used < 16) {
948         for (uint32_t i = 0; i < used; ++i) {
949             /* suffix match */
950             const data_string *ds = (data_string *)mimetypes->data[i];
951             const size_t klen = buffer_clen(&ds->key);
952             if (klen <= nlen && buffer_eq_icase_ssn(end-klen, ds->key.ptr, klen))
953                 return &ds->value;
954         }
955     }
956     else {
957         const char *s;
958         const data_string *ds;
959         if (nlen) {
960             for (s = end-1; s != name && *s != '/'; --s) ; /*(like memrchr())*/
961             if (*s == '/') ++s;
962         }
963         else {
964             s = name;
965         }
966         /* search for basename, then longest .ext2.ext1, then .ext1, then "" */
967         ds = (const data_string *)array_get_element_klen(mimetypes, s, end - s);
968         if (NULL != ds) return &ds->value;
969         while (++s < end) {
970             while (*s != '.' && ++s != end) ;
971             if (s == end) break;
972             /* search ".ext" then "ext" */
973             ds = (const data_string *)array_get_element_klen(mimetypes, s, end - s);
974             if (NULL != ds) return &ds->value;
975             /* repeat search without leading '.' to handle situation where
976              * admin configured mimetype.assign keys without leading '.' */
977             if (++s < end) {
978                 if (*s == '.') { --s; continue; }
979                 ds = (const data_string *)array_get_element_klen(mimetypes, s, end - s);
980                 if (NULL != ds) return &ds->value;
981             }
982         }
983         /* search for ""; catchall */
984         ds = (const data_string *)array_get_element_klen(mimetypes, CONST_STR_LEN(""));
985         if (NULL != ds) return &ds->value;
986     }
987 
988     return NULL;
989 }
990 
991 #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
992 
993 const buffer * stat_cache_mimetype_by_xattr(const char * const name)
994 {
995     return stat_cache_attr_get(name) ? &attrb : NULL;
996 }
997 
998 const buffer * stat_cache_content_type_get_by_xattr(stat_cache_entry *sce, const array *mimetypes, int use_xattr)
999 {
1000     /*(invalid caching if user config has multiple, different
1001      * r->conf.mimetypes for same extension (not expected))*/
1002     if (!buffer_is_blank(&sce->content_type)) return &sce->content_type;
1003 
1004     if (!S_ISREG(sce->st.st_mode)) return NULL;
1005 
1006     /* cache mimetype */
1007     const buffer *mtype =
1008       (use_xattr) ? stat_cache_mimetype_by_xattr(sce->name.ptr) : NULL;
1009     if (NULL == mtype)
1010         mtype = stat_cache_mimetype_by_ext(mimetypes, BUF_PTR_LEN(&sce->name));
1011     if (NULL != mtype) {
1012         if (sce->content_type.size) {
1013             buffer_copy_buffer(&sce->content_type, mtype);
1014         }
1015         else if (mtype == &attrb) {
1016             sce->content_type.ptr = NULL;
1017             buffer_copy_buffer(&sce->content_type, mtype);
1018         }
1019         else {
1020             /*(copy pointers from mimetypes array; avoid allocation)*/
1021             sce->content_type.ptr = mtype->ptr;
1022             sce->content_type.used = mtype->used;
1023             /*(leave sce->content_type.size = 0 to flag not-allocated)*/
1024         }
1025     }
1026     else
1027         buffer_clear(&sce->content_type);
1028 
1029     return &sce->content_type;
1030 }
1031 
1032 #else
1033 
1034 const buffer * stat_cache_content_type_get_by_ext(stat_cache_entry *sce, const array *mimetypes)
1035 {
1036     /*(invalid caching if user config has multiple, different
1037      * r->conf.mimetypes for same extension (not expected))*/
1038     if (!buffer_is_blank(&sce->content_type)) return &sce->content_type;
1039 
1040     if (!S_ISREG(sce->st.st_mode)) return NULL;
1041 
1042     /* cache mimetype */
1043     const buffer * const mtype =
1044       stat_cache_mimetype_by_ext(mimetypes, BUF_PTR_LEN(&sce->name));
1045     if (NULL != mtype) {
1046         /*(copy pointers from mimetypes array; avoid allocation)*/
1047         sce->content_type.ptr = mtype->ptr;
1048         sce->content_type.used = mtype->used;
1049         /*(leave sce->content_type.size = 0 to flag not-allocated)*/
1050     }
1051     else
1052         buffer_clear(&sce->content_type);
1053 
1054     return &sce->content_type;
1055 }
1056 
1057 #endif
1058 
1059 const buffer * stat_cache_etag_get(stat_cache_entry *sce, int flags) {
1060     /*(invalid caching if user cfg has multiple, different r->conf.etag_flags
1061      * for same path (not expected, since etag flags should be by filesystem))*/
1062     if (!buffer_is_blank(&sce->etag)) return &sce->etag;
1063 
1064     if (S_ISREG(sce->st.st_mode) || S_ISDIR(sce->st.st_mode)) {
1065         if (0 == flags) return NULL;
1066         http_etag_create(&sce->etag, &sce->st, flags);
1067         return &sce->etag;
1068     }
1069 
1070     return NULL;
1071 }
1072 
1073 __attribute_pure__
1074 static int stat_cache_stat_eq(const struct stat * const sta, const struct stat * const stb) {
1075     return
1076       #ifdef st_mtime /* use high-precision timestamp if available */
1077       #if defined(__APPLE__) && defined(__MACH__)
1078         sta->st_mtimespec.tv_nsec == stb->st_mtimespec.tv_nsec
1079       #else
1080         sta->st_mtim.tv_nsec == stb->st_mtim.tv_nsec
1081       #endif
1082       #else
1083         1
1084       #endif
1085         && sta->st_mtime == stb->st_mtime
1086         && sta->st_size  == stb->st_size
1087         && sta->st_ino   == stb->st_ino
1088         && sta->st_dev   == stb->st_dev;
1089 }
1090 
1091 void stat_cache_update_entry(const char *name, uint32_t len,
1092                              struct stat *st, buffer *etagb)
1093 {
1094     if (sc.stat_cache_engine == STAT_CACHE_ENGINE_NONE) return;
1095     force_assert(0 != len);
1096     if (name[len-1] == '/') { if (0 == --len) len = 1; }
1097     splay_tree **sptree = &sc.files;
1098     stat_cache_entry *sce =
1099       stat_cache_sptree_find(sptree, name, len);
1100     if (sce && buffer_is_equal_string(&sce->name, name, len)) {
1101         if (!stat_cache_stat_eq(&sce->st, st)) {
1102             /* etagb might be NULL to clear etag (invalidate) */
1103             buffer_clear(&sce->etag);
1104             if (etagb)
1105                 buffer_copy_string_len(&sce->etag, BUF_PTR_LEN(etagb));
1106           #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
1107             buffer_clear(&sce->content_type);
1108           #endif
1109             if (sce->fd >= 0) {
1110                 if (1 == sce->refcnt) {
1111                     close(sce->fd);
1112                     sce->fd = -1;
1113                 }
1114                 else {
1115                     --sce->refcnt; /* stat_cache_entry_free(sce); */
1116                     (*sptree)->data = sce = stat_cache_entry_init();
1117                     buffer_copy_string_len(&sce->name, name, len);
1118                 }
1119             }
1120             sce->st = *st;
1121         }
1122         sce->stat_ts = log_monotonic_secs;
1123     }
1124 }
1125 
1126 void stat_cache_delete_entry(const char *name, uint32_t len)
1127 {
1128     if (sc.stat_cache_engine == STAT_CACHE_ENGINE_NONE) return;
1129     force_assert(0 != len);
1130     if (name[len-1] == '/') { if (0 == --len) len = 1; }
1131     splay_tree **sptree = &sc.files;
1132     stat_cache_entry *sce = stat_cache_sptree_find(sptree, name, len);
1133     if (sce && buffer_is_equal_string(&sce->name, name, len)) {
1134         stat_cache_entry_free(sce);
1135         *sptree = splaytree_delete(*sptree, (*sptree)->key);
1136     }
1137 }
1138 
1139 void stat_cache_invalidate_entry(const char *name, uint32_t len)
1140 {
1141     splay_tree **sptree = &sc.files;
1142     stat_cache_entry *sce = stat_cache_sptree_find(sptree, name, len);
1143     if (sce && buffer_is_equal_string(&sce->name, name, len)) {
1144         sce->stat_ts = 0;
1145       #ifdef HAVE_FAM_H
1146         if (sce->fam_dir != NULL) {
1147             --((fam_dir_entry *)sce->fam_dir)->refcnt;
1148             sce->fam_dir = NULL;
1149         }
1150       #endif
1151     }
1152 }
1153 
1154 #ifdef HAVE_FAM_H
1155 
1156 static void stat_cache_invalidate_dir_tree_walk(splay_tree *t,
1157                                                 const char *name, size_t len)
1158 {
1159     if (t->left)  stat_cache_invalidate_dir_tree_walk(t->left,  name, len);
1160     if (t->right) stat_cache_invalidate_dir_tree_walk(t->right, name, len);
1161 
1162     const buffer * const b = &((stat_cache_entry *)t->data)->name;
1163     const size_t blen = buffer_clen(b);
1164     if (blen > len && b->ptr[len] == '/' && 0 == memcmp(b->ptr, name, len)) {
1165         stat_cache_entry *sce = t->data;
1166         sce->stat_ts = 0;
1167         if (sce->fam_dir != NULL) {
1168             --((fam_dir_entry *)sce->fam_dir)->refcnt;
1169             sce->fam_dir = NULL;
1170         }
1171     }
1172 }
1173 
1174 static void stat_cache_invalidate_dir_tree(const char *name, size_t len)
1175 {
1176     splay_tree * const sptree = sc.files;
1177     if (sptree) stat_cache_invalidate_dir_tree_walk(sptree, name, len);
1178 }
1179 
1180 #endif
1181 
1182 /*
1183  * walk though splay_tree and collect contents of dir tree.
1184  * remove tagged entries in a second loop
1185  */
1186 
1187 static void stat_cache_tag_dir_tree(splay_tree *t, const char *name, size_t len,
1188                                     int *keys, int *ndx)
1189 {
1190     if (*ndx == 8192) return; /*(must match num array entries in keys[])*/
1191     if (t->left)  stat_cache_tag_dir_tree(t->left,  name, len, keys, ndx);
1192     if (t->right) stat_cache_tag_dir_tree(t->right, name, len, keys, ndx);
1193     if (*ndx == 8192) return; /*(must match num array entries in keys[])*/
1194 
1195     const buffer * const b = &((stat_cache_entry *)t->data)->name;
1196     const size_t blen = buffer_clen(b);
1197     if (blen > len && b->ptr[len] == '/' && 0 == memcmp(b->ptr, name, len))
1198         keys[(*ndx)++] = t->key;
1199 }
1200 
1201 __attribute_noinline__
1202 static void stat_cache_prune_dir_tree(const char *name, size_t len)
1203 {
1204     splay_tree *sptree = sc.files;
1205     int max_ndx, i;
1206     int keys[8192]; /* 32k size on stack */
1207     do {
1208         if (!sptree) break;
1209         max_ndx = 0;
1210         stat_cache_tag_dir_tree(sptree, name, len, keys, &max_ndx);
1211         for (i = 0; i < max_ndx; ++i) {
1212             const int ndx = keys[i];
1213             splay_tree *node = sptree = splaytree_splay(sptree, ndx);
1214             if (node && node->key == ndx) {
1215                 stat_cache_entry_free(node->data);
1216                 sptree = splaytree_delete(sptree, ndx);
1217             }
1218         }
1219     } while (max_ndx == sizeof(keys)/sizeof(int));
1220     sc.files = sptree;
1221 }
1222 
1223 static void stat_cache_delete_tree(const char *name, uint32_t len)
1224 {
1225     stat_cache_delete_entry(name, len);
1226     stat_cache_prune_dir_tree(name, len);
1227 }
1228 
1229 void stat_cache_delete_dir(const char *name, uint32_t len)
1230 {
1231     force_assert(0 != len);
1232     if (name[len-1] == '/') { if (0 == --len) len = 1; }
1233     stat_cache_delete_tree(name, len);
1234   #ifdef HAVE_FAM_H
1235     if (sc.stat_cache_engine == STAT_CACHE_ENGINE_FAM) {
1236         splay_tree **sptree = &sc.scf->dirs;
1237         fam_dir_entry *fam_dir = stat_cache_sptree_find(sptree, name, len);
1238         if (fam_dir && buffer_eq_slen(&fam_dir->name, name, len))
1239             fam_dir_invalidate_node(fam_dir);
1240         if (*sptree) fam_dir_invalidate_tree(*sptree, name, len);
1241         fam_dir_periodic_cleanup();
1242     }
1243   #endif
1244 }
1245 
1246 /***
1247  *
1248  *
1249  *
1250  * returns:
1251  *  - HANDLER_FINISHED on cache-miss (don't forget to reopen the file)
1252  *  - HANDLER_ERROR on stat() failed -> see errno for problem
1253  */
1254 
1255 stat_cache_entry * stat_cache_get_entry(const buffer * const name) {
1256 	stat_cache_entry *sce = NULL;
1257 
1258 	/* consistency: ensure lookup name does not end in '/' unless root "/"
1259 	 * (but use full path given with stat(), even with trailing '/') */
1260 	int final_slash = 0;
1261 	size_t len = buffer_clen(name);
1262 	force_assert(0 != len);
1263 	if (name->ptr[len-1] == '/') { final_slash = 1; if (0 == --len) len = 1; }
1264 	/* Note: paths are expected to be normalized before calling stat_cache,
1265 	 * e.g. without repeated '/' */
1266 
1267 	if (name->ptr[0] != '/') {
1268 		errno = EINVAL;
1269 		return NULL;
1270 	}
1271 
1272 	/*
1273 	 * check if the directory for this file has changed
1274 	 */
1275 
1276 	const unix_time64_t cur_ts = log_monotonic_secs;
1277 
1278 	const int file_ndx = splaytree_djbhash(name->ptr, len);
1279 	splay_tree *sptree = sc.files = splaytree_splay(sc.files, file_ndx);
1280 
1281 	if (sptree && (sptree->key == file_ndx)) {
1282 		/* we have seen this file already and
1283 		 * don't stat() it again in the same second */
1284 
1285 		sce = sptree->data;
1286 
1287 		/* check if the name is the same, we might have a collision */
1288 
1289 		if (buffer_is_equal_string(&sce->name, name->ptr, len)) {
1290 			if (sc.stat_cache_engine == STAT_CACHE_ENGINE_SIMPLE) {
1291 				if (sce->stat_ts == cur_ts) {
1292 					if (final_slash && !S_ISDIR(sce->st.st_mode)) {
1293 						errno = ENOTDIR;
1294 						return NULL;
1295 					}
1296 					return sce;
1297 				}
1298 			}
1299 		      #ifdef HAVE_FAM_H
1300 			else if (sc.stat_cache_engine == STAT_CACHE_ENGINE_FAM
1301 				 && sce->fam_dir) { /* entry is in monitored dir */
1302 				/* re-stat() periodically, even if monitoring for changes
1303 				 * (due to limitations in stat_cache.c use of FAM)
1304 				 * (gaps due to not continually monitoring an entire tree) */
1305 				if (cur_ts - sce->stat_ts < 16) {
1306 					if (final_slash && !S_ISDIR(sce->st.st_mode)) {
1307 						errno = ENOTDIR;
1308 						return NULL;
1309 					}
1310 					return sce;
1311 				}
1312 			}
1313 		      #endif
1314 		} else {
1315 			/* collision, forget about the entry */
1316 			sce = NULL;
1317 		}
1318 	}
1319 
1320 	struct stat st;
1321 	if (-1 == stat(name->ptr, &st)) {
1322 		return NULL;
1323 	}
1324 
1325 	if (NULL == sce) {
1326 
1327 		/* fix broken stat/open for symlinks to reg files with appended slash on freebsd,osx */
1328 		if (final_slash && S_ISREG(st.st_mode)) {
1329 			errno = ENOTDIR;
1330 			return NULL;
1331 		}
1332 
1333 		sce = stat_cache_entry_init();
1334 		buffer_copy_string_len(&sce->name, name->ptr, len);
1335 
1336 		/* already splayed file_ndx */
1337 		if (NULL != sptree && sptree->key == file_ndx) {
1338 			/* hash collision: replace old entry */
1339 			stat_cache_entry_free(sptree->data);
1340 			sptree->data = sce;
1341 		} else {
1342 			/*sptree =*/ sc.files = splaytree_insert(sptree, file_ndx, sce);
1343 		}
1344 
1345 	} else {
1346 
1347 		buffer_clear(&sce->etag);
1348 	      #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
1349 		buffer_clear(&sce->content_type);
1350 	      #endif
1351 
1352 		/* close fd if file changed */
1353 		if (sce->fd >= 0 && !stat_cache_stat_eq(&sce->st, &st)) {
1354 			if (1 == sce->refcnt) {
1355 				close(sce->fd);
1356 				sce->fd = -1;
1357 			}
1358 			else {
1359 				--sce->refcnt; /* stat_cache_entry_free(sce); */
1360 				sptree->data = sce = stat_cache_entry_init();
1361 				buffer_copy_string_len(&sce->name, name->ptr, len);
1362 			}
1363 		}
1364 	}
1365 
1366 	sce->st = st; /*(copy prior to calling fam_dir_monitor())*/
1367 
1368 #ifdef HAVE_FAM_H
1369 	if (sc.stat_cache_engine == STAT_CACHE_ENGINE_FAM) {
1370 		if (sce->fam_dir) --((fam_dir_entry *)sce->fam_dir)->refcnt;
1371 		sce->fam_dir =
1372 		  fam_dir_monitor(sc.scf, name->ptr, len, &st);
1373 	      #if 0 /*(performed below)*/
1374 		if (NULL != sce->fam_dir) {
1375 			/*(may have been invalidated by dir change)*/
1376 			sce->stat_ts = cur_ts;
1377 		}
1378 	      #endif
1379 	}
1380 #endif
1381 
1382 	sce->stat_ts = cur_ts;
1383 	return sce;
1384 }
1385 
1386 stat_cache_entry * stat_cache_get_entry_open(const buffer * const name, const int symlinks) {
1387     stat_cache_entry * const sce = stat_cache_get_entry(name);
1388     if (NULL == sce) return NULL;
1389     if (sce->fd >= 0) return sce;
1390     if (sce->st.st_size > 0) {
1391         sce->fd = stat_cache_open_rdonly_fstat(name, &sce->st, symlinks);
1392         buffer_clear(&sce->etag);
1393     }
1394     return sce; /* (note: sce->fd might still be -1 if open() failed) */
1395 }
1396 
1397 const stat_cache_st * stat_cache_path_stat (const buffer * const name) {
1398     const stat_cache_entry * const sce = stat_cache_get_entry(name);
1399     return sce ? &sce->st : NULL;
1400 }
1401 
1402 int stat_cache_path_isdir(const buffer *name) {
1403     const stat_cache_entry * const sce = stat_cache_get_entry(name);
1404     return (sce && (S_ISDIR(sce->st.st_mode) ? 1 : (errno = ENOTDIR, 0)));
1405 }
1406 
1407 int stat_cache_path_contains_symlink(const buffer *name, log_error_st *errh) {
1408     /* caller should check for symlinks only if we should block symlinks. */
1409 
1410     /* catch the obvious symlinks
1411      *
1412      * this is not a secure check as we still have a race-condition between
1413      * the stat() and the open. We can only solve this by
1414      * 1. open() the file
1415      * 2. fstat() the fd
1416      *
1417      * and keeping the file open for the rest of the time. But this can
1418      * only be done at network level.
1419      * */
1420 
1421   #ifdef HAVE_LSTAT
1422     /* we assume "/" can not be symlink,
1423      * so skip the symlink stuff if path is "/" */
1424     size_t len = buffer_clen(name);
1425     force_assert(0 != len);
1426     force_assert(name->ptr[0] == '/');
1427     if (1 == len) return 0;
1428    #ifndef PATH_MAX
1429    #define PATH_MAX 4096
1430    #endif
1431     if (len >= PATH_MAX) return -1;
1432 
1433     char buf[PATH_MAX];
1434     memcpy(buf, name->ptr, len);
1435     char *s_cur = buf+len;
1436     do {
1437         *s_cur = '\0';
1438         struct stat st;
1439         if (0 == lstat(buf, &st)) {
1440             if (S_ISLNK(st.st_mode)) return 1;
1441         }
1442         else {
1443             log_perror(errh, __FILE__, __LINE__, "lstat failed for: %s", buf);
1444             return -1;
1445         }
1446     } while ((s_cur = strrchr(buf, '/')) > buf); /*(&buf[0]==buf; NULL < buf)*/
1447   #endif
1448 
1449     return 0;
1450 }
1451 
1452 int stat_cache_open_rdonly_fstat (const buffer *name, struct stat *st, int symlinks) {
1453 	/*(Note: O_NOFOLLOW affects only the final path segment, the target file,
1454 	 * not any intermediate symlinks along the path)*/
1455 	const int fd = fdevent_open_cloexec(name->ptr, symlinks, O_RDONLY, 0);
1456 	if (fd >= 0) {
1457 		if (0 == fstat(fd, st)) {
1458 			return fd;
1459 		} else {
1460 			const int errnum = errno;
1461 			close(fd);
1462 			errno = errnum;
1463 		}
1464 	}
1465 	return -1;
1466 }
1467 
1468 /**
1469  * remove stat() from cache which haven't been stat()ed for
1470  * more than 2 seconds
1471  *
1472  *
1473  * walk though the stat-cache, collect the ids which are too old
1474  * and remove them in a second loop
1475  */
1476 
1477 static void stat_cache_tag_old_entries(splay_tree * const t, int * const keys, int * const ndx, const time_t max_age, const unix_time64_t cur_ts) {
1478     if (*ndx == 8192) return; /*(must match num array entries in keys[])*/
1479     if (t->left)
1480         stat_cache_tag_old_entries(t->left, keys, ndx, max_age, cur_ts);
1481     if (t->right)
1482         stat_cache_tag_old_entries(t->right, keys, ndx, max_age, cur_ts);
1483     if (*ndx == 8192) return; /*(must match num array entries in keys[])*/
1484 
1485     const stat_cache_entry * const sce = t->data;
1486     if (cur_ts - sce->stat_ts > max_age)
1487         keys[(*ndx)++] = t->key;
1488 }
1489 
1490 static void stat_cache_periodic_cleanup(const time_t max_age, const unix_time64_t cur_ts) {
1491     splay_tree *sptree = sc.files;
1492     int max_ndx, i;
1493     int keys[8192]; /* 32k size on stack */
1494     do {
1495         if (!sptree) break;
1496         max_ndx = 0;
1497         stat_cache_tag_old_entries(sptree, keys, &max_ndx, max_age, cur_ts);
1498         for (i = 0; i < max_ndx; ++i) {
1499             int ndx = keys[i];
1500             sptree = splaytree_splay(sptree, ndx);
1501             if (sptree && sptree->key == ndx) {
1502                 stat_cache_entry_free(sptree->data);
1503                 sptree = splaytree_delete(sptree, ndx);
1504             }
1505         }
1506     } while (max_ndx == sizeof(keys)/sizeof(int));
1507     sc.files = sptree;
1508 }
1509 
1510 void stat_cache_trigger_cleanup(void) {
1511 	time_t max_age = 2;
1512 
1513       #ifdef HAVE_FAM_H
1514 	if (STAT_CACHE_ENGINE_FAM == sc.stat_cache_engine) {
1515 		if (log_monotonic_secs & 0x1F) return;
1516 		/* once every 32 seconds (0x1F == 31) */
1517 		max_age = 32;
1518 		fam_dir_periodic_cleanup();
1519 		/* By doing this before stat_cache_periodic_cleanup(),
1520 		 * entries used within the next max_age secs will remain
1521 		 * monitored, instead of effectively flushing and
1522 		 * rebuilding the FAM monitoring every max_age seconds */
1523 	}
1524       #endif
1525 
1526 	stat_cache_periodic_cleanup(max_age, log_monotonic_secs);
1527 }
1528