xref: /lighttpd1.4/src/stat_cache.c (revision cd738d4d)
1 #include "first.h"
2 
3 #include "stat_cache.h"
4 #include "log.h"
5 #include "fdevent.h"
6 #include "http_etag.h"
7 #include "algo_splaytree.h"
8 
9 #include <sys/types.h>
10 #include <sys/stat.h>
11 
12 #include <stdlib.h>
13 #include <string.h>
14 #include <errno.h>
15 #include <unistd.h>
16 #include <fcntl.h>
17 
18 #if defined(HAVE_SYS_XATTR_H)
19 # include <sys/xattr.h>
20 #elif defined(HAVE_ATTR_ATTRIBUTES_H)
21 # include <attr/attributes.h>
22 #endif
23 
24 #ifdef HAVE_SYS_EXTATTR_H
25 # include <sys/extattr.h>
26 #endif
27 
28 #ifndef HAVE_LSTAT
29 #define lstat stat
30 #ifndef S_ISLNK
31 #define S_ISLNK(mode) (0)
32 #endif
33 #endif
34 
35 /*
36  * stat-cache
37  *
38  * - a splay-tree is used as we can use the caching effect of it
39  */
40 
41 enum {
42   STAT_CACHE_ENGINE_SIMPLE  = 0  /*(default)*/
43  ,STAT_CACHE_ENGINE_NONE    = 1
44  ,STAT_CACHE_ENGINE_FAM     = 2  /* same as STAT_CACHE_ENGINE_INOTIFY */
45  ,STAT_CACHE_ENGINE_INOTIFY = 2  /* same as STAT_CACHE_ENGINE_FAM */
46  ,STAT_CACHE_ENGINE_KQUEUE  = 2  /* same as STAT_CACHE_ENGINE_FAM */
47 };
48 
49 struct stat_cache_fam;  /* declaration */
50 
51 typedef struct stat_cache {
52 	int stat_cache_engine;
53 	splay_tree *files; /* nodes of tree are (stat_cache_entry *) */
54 	struct stat_cache_fam *scf;
55 } stat_cache;
56 
57 static stat_cache sc;
58 
59 
60 static void * stat_cache_sptree_find(splay_tree ** const sptree,
61                                      const char * const name,
62                                      uint32_t len)
63 {
64     const int ndx = splaytree_djbhash(name, len);
65     *sptree = splaytree_splay(*sptree, ndx);
66     return (*sptree && (*sptree)->key == ndx) ? (*sptree)->data : NULL;
67 }
68 
69 
70 #if defined(HAVE_SYS_INOTIFY_H) \
71  || (defined(HAVE_SYS_EVENT_H) && defined(HAVE_KQUEUE))
72 #ifndef HAVE_FAM_H
73 #define HAVE_FAM_H
74 #endif
75 #endif
76 
77 #ifdef HAVE_FAM_H
78 
79 /* monitor changes in directories using FAM
80  *
81  * This implementation employing FAM monitors directories as they are used,
82  * and maintains a reference count for cache use within stat_cache.c.
83  * A periodic job runs in lighttpd every 32 seconds, expiring entries unused
84  * in last 64 seconds out of the cache and cancelling FAM monitoring.  Items
85  * within the cache are checked against the filesystem upon use if last stat()
86  * was greater than or equal to 16 seconds ago.
87  *
88  * This implementation does not monitor every directory in a tree, and therefore
89  * the cache may get out-of-sync with the filesystem.  Delays in receiving and
90  * processing events from FAM might also lead to stale cache entries.
91  *
92  * For many websites, a large number of files are seldom, if ever, modified,
93  * and a common practice with images is to create a new file with a new name
94  * when a new version is needed, in order for client browsers and CDNs to better
95  * cache the content.  Given this, most use will see little difference in
96  * performance between server.stat-cache-engine = "fam" and "simple" (default).
97  * The default server.stat-cache-engine = "simple" calls stat() on a target once
98  * per second, and reuses that information until the next second.  For use where
99  * changes must be immediately visible, server.stat-cache-engine = "disable"
100  * should be used.
101  *
102  * When considering use of server.stat-cache-engine = "fam", there are a few
103  * additional limitations for this cache implementation using FAM.
104  * - symlinks to files located outside of the current directory do not result
105  *   in changes to that file being monitored (unless that file is in a directory
106  *   which is monitored as a result of a different request).  symlinks can be
107  *   chained and can be circular.  This implementation *does not* readlink() or
108  *   realpath() to resolve the chains to find and monitor the ultimate target
109  *   directory.  While symlinks to files located outside the current directory
110  *   are not monitored, symlinks to directories *are* monitored, though chains
111  *   of symlinks to directories do not result in monitoring of the directories
112  *   containing intermediate symlinks to the target directory.
113  * - directory rename of a directory which is not currently being monitored will
114  *   result in stale information in the cache if there is a subdirectory that is
115  *   being monitored.
116  * Even though lighttpd will not receive FAM events in the above cases, lighttpd
117  * does re-validate the information in the cache upon use if the cache entry has
118  * not been checked in 16 seconds, so that is the upper limit for use of stale
119  * data.
120  *
121  * Use of server.stat-cache-engine = "fam" is discouraged for extremely volatile
122  * directories such as temporary directories (e.g. /tmp and maybe /var/tmp) due
123  * to the overhead of processing the additional noise generated from changes.
124  * Related, server.stat-cache-engine = "fam" is not recommended on trees of
125  * untrusted files where a malicious user could generate an excess of change
126  * events.
127  *
128  * Internal note: lighttpd walks the caches to prune trees in stat_cache when an
129  * event is received for a directory (or symlink to a directory) which has been
130  * deleted or renamed.  The splaytree data structure is suboptimal for frequent
131  * changes of large directories trees where there have been a large number of
132  * different files recently accessed and part of the stat_cache.
133  */
134 
135 #if defined(HAVE_SYS_INOTIFY_H) \
136  && !(defined(HAVE_SYS_EVENT_H) && defined(HAVE_KQUEUE))
137 
138 #include <sys/inotify.h>
139 
140 /*(translate FAM API to inotify; this is specific to stat_cache.c use of FAM)*/
141 #define fam fd /*(translate struct stat_cache_fam scf->fam -> scf->fd)*/
142 typedef int FAMRequest; /*(fr)*/
143 #define FAMClose(fd) \
144         close(*(fd))
145 #define FAMCancelMonitor(fd, wd) \
146         inotify_rm_watch(*(fd), *(wd))
147 #define fam_watch_mask ( IN_ATTRIB | IN_CREATE | IN_DELETE | IN_DELETE_SELF \
148                        | IN_MODIFY | IN_MOVE_SELF | IN_MOVED_FROM \
149                        | IN_EXCL_UNLINK | IN_ONLYDIR )
150                      /*(note: follows symlinks; not providing IN_DONT_FOLLOW)*/
151 #define FAMMonitorDirectory(fd, fn, wd, userData) \
152         ((*(wd) = inotify_add_watch(*(fd), (fn), (fam_watch_mask))) < 0)
153 typedef enum FAMCodes { /*(copied from fam.h to define arbitrary enum values)*/
154     FAMChanged=1,
155     FAMDeleted=2,
156     FAMCreated=5,
157     FAMMoved=6,
158 } FAMCodes;
159 
160 #elif defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE
161 #undef HAVE_SYS_INOTIFY_H
162 
163 #include <sys/event.h>
164 #include <sys/time.h>
165 
166 /*(translate FAM API to inotify; this is specific to stat_cache.c use of FAM)*/
167 #define fam fd /*(translate struct stat_cache_fam scf->fam -> scf->fd)*/
168 typedef int FAMRequest; /*(fr)*/
169 #define FAMClose(fd) \
170         (-1 != (*(fd)) ? close(*(fd)) : 0)
171 static int FAMCancelMonitor (const int * const fd, int * const wd)
172 {
173     if (-1 == *fd) return 0;
174     if (-1 == *wd) return 0;
175     struct timespec t0 = { 0, 0 };
176     struct kevent kev;
177     EV_SET(&kev, *wd, EVFILT_VNODE, EV_DELETE, 0, 0, 0);
178     int rc = kevent(*fd, &kev, 1, NULL, 0, &t0);
179     close(*wd);
180     *wd = -1;
181     return rc;
182 }
183 static int FAMMonitorDirectory (int * const fd, char * const fn, int * const wd, void * const userData)
184 {
185     *wd = fdevent_open_dirname(fn, 1); /*(note: follows symlinks)*/
186     if (-1 == *wd) return -1;
187     struct timespec t0 = { 0, 0 };
188     struct kevent kev;
189     unsigned short kev_flags = EV_ADD | EV_ENABLE | EV_CLEAR;
190     unsigned int kev_fflags = NOTE_ATTRIB | NOTE_EXTEND | NOTE_LINK | NOTE_WRITE
191                             | NOTE_DELETE | NOTE_REVOKE | NOTE_RENAME;
192     EV_SET(&kev, *wd, EVFILT_VNODE, kev_flags, kev_fflags, 0, userData);
193     return kevent(*fd, &kev, 1, NULL, 0, &t0);
194 }
195 typedef enum FAMCodes { /*(copied from fam.h to define arbitrary enum values)*/
196     FAMChanged=1,
197     FAMDeleted=2,
198     FAMCreated=5,
199     FAMMoved=6,
200 } FAMCodes;
201 
202 #else
203 
204 #include <fam.h>
205 
206 #ifdef HAVE_FAMNOEXISTS
207 #ifndef LIGHTTPD_STATIC
208 #ifdef HAVE_DLFCN_H
209 #include <dlfcn.h>
210 #endif
211 #endif
212 #endif
213 
214 #endif
215 
216 typedef struct fam_dir_entry {
217 	buffer name;
218 	int refcnt;
219 	FAMRequest req;
220 	unix_time64_t stat_ts;
221 	dev_t st_dev;
222 	ino_t st_ino;
223 	struct fam_dir_entry *fam_parent;
224 } fam_dir_entry;
225 
226 typedef struct stat_cache_fam {
227 	splay_tree *dirs; /* indexed by path; node data is fam_dir_entry */
228   #ifdef HAVE_SYS_INOTIFY_H
229 	splay_tree *wds;  /* indexed by inotify watch descriptor */
230   #elif defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE
231   #else
232 	FAMConnection fam;
233   #endif
234 	log_error_st *errh;
235 	fdevents *ev;
236 	fdnode *fdn;
237 	int fd;
238 } stat_cache_fam;
239 
240 static fam_dir_entry * fam_dir_entry_init(const char *name, size_t len)
241 {
242     fam_dir_entry * const fam_dir = calloc(1, sizeof(*fam_dir));
243     force_assert(NULL != fam_dir);
244 
245     buffer_copy_string_len(&fam_dir->name, name, len);
246     fam_dir->refcnt = 0;
247   #if defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE
248     fam_dir->req = -1;
249   #endif
250 
251     return fam_dir;
252 }
253 
254 static void fam_dir_entry_free(fam_dir_entry *fam_dir)
255 {
256     if (!fam_dir) return;
257     /*(fam_dir->fam_parent might be invalid pointer here; ignore)*/
258     free(fam_dir->name.ptr);
259   #if defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE
260     if (-1 != fam_dir->req)
261         close(fam_dir->req);
262   #endif
263     free(fam_dir);
264 }
265 
266 static void fam_dir_invalidate_node(fam_dir_entry *fam_dir)
267 {
268     fam_dir->stat_ts = 0;
269     if (fam_dir->fam_parent) {
270         --fam_dir->fam_parent->refcnt;
271         fam_dir->fam_parent = NULL;
272     }
273 }
274 
275 /*
276  * walk though splay_tree and collect contents of dir tree.
277  * remove tagged entries in a second loop
278  */
279 
280 static void fam_dir_tag_refcnt(splay_tree *t, int *keys, int *ndx)
281 {
282     if (*ndx == 512) return; /*(must match num array entries in keys[])*/
283     if (t->left)  fam_dir_tag_refcnt(t->left,  keys, ndx);
284     if (t->right) fam_dir_tag_refcnt(t->right, keys, ndx);
285     if (*ndx == 512) return; /*(must match num array entries in keys[])*/
286 
287     fam_dir_entry * const fam_dir = t->data;
288     if (0 == fam_dir->refcnt) {
289         fam_dir_invalidate_node(fam_dir);
290         keys[(*ndx)++] = t->key;
291     }
292 }
293 
294 __attribute_noinline__
295 static void fam_dir_periodic_cleanup() {
296     stat_cache_fam * const scf = sc.scf;
297     int max_ndx, i;
298     int keys[512]; /* 2k size on stack */
299   #if defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE
300     struct kevent kevl[512]; /* 32k size on stack to batch kevent EV_DELETE */
301   #endif
302     do {
303         if (!scf->dirs) break;
304         max_ndx = 0;
305         fam_dir_tag_refcnt(scf->dirs, keys, &max_ndx);
306         for (i = 0; i < max_ndx; ++i) {
307             const int ndx = keys[i];
308             splay_tree *node = scf->dirs = splaytree_splay(scf->dirs, ndx);
309             if (node && node->key == ndx) {
310                 fam_dir_entry *fam_dir = node->data;
311                 scf->dirs = splaytree_delete(scf->dirs, ndx);
312               #ifdef HAVE_SYS_INOTIFY_H
313                 scf->wds = splaytree_delete(scf->wds, fam_dir->req);
314               #elif defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE
315                 /* batch process kevent removal; defer cancel */
316                 EV_SET(kevl+i, fam_dir->req, EVFILT_VNODE, EV_DELETE, 0, 0, 0);
317                 fam_dir->req = -1; /*(make FAMCancelMonitor() a no-op)*/
318               #endif
319                 FAMCancelMonitor(&scf->fam, &fam_dir->req);
320                 fam_dir_entry_free(fam_dir);
321             }
322         }
323       #if defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE
324         /* batch process: kevent() to submit EV_DELETE, then close dir fds */
325         if (0 == max_ndx) break;
326         struct timespec t0 = { 0, 0 };
327         kevent(scf->fd, kevl, max_ndx, NULL, 0, &t0);
328         for (i = 0; i < max_ndx; ++i)
329             close((int)kevl[i].ident);
330       #endif
331     } while (max_ndx == sizeof(keys)/sizeof(int));
332 }
333 
334 static void fam_dir_invalidate_tree(splay_tree *t, const char *name, size_t len)
335 {
336   #ifdef __clang_analyzer__
337     force_assert(name);
338   #endif
339     /*force_assert(t);*/
340     if (t->left)  fam_dir_invalidate_tree(t->left,  name, len);
341     if (t->right) fam_dir_invalidate_tree(t->right, name, len);
342 
343     fam_dir_entry * const fam_dir = t->data;
344   #ifdef __clang_analyzer__
345     force_assert(fam_dir);
346   #endif
347     const buffer * const b = &fam_dir->name;
348     size_t blen = buffer_clen(b);
349     if (blen > len && b->ptr[len] == '/' && 0 == memcmp(b->ptr, name, len))
350         fam_dir_invalidate_node(fam_dir);
351 }
352 
353 /* declarations */
354 static void stat_cache_delete_tree(const char *name, uint32_t len);
355 static void stat_cache_invalidate_dir_tree(const char *name, size_t len);
356 static void stat_cache_handle_fdevent_fn(stat_cache_fam * const scf, fam_dir_entry * const fam_dir, const char * const fn, const uint32_t fnlen, int code);
357 
358 static void stat_cache_handle_fdevent_in(stat_cache_fam *scf)
359 {
360   #ifdef HAVE_SYS_INOTIFY_H
361     /*(inotify pads in->len to align struct following in->name[])*/
362     char buf[4096]
363       __attribute__ ((__aligned__(__alignof__(struct inotify_event))));
364     int rd;
365     do {
366         rd = (int)read(scf->fd, buf, sizeof(buf));
367         if (rd <= 0) {
368             if (-1 == rd && errno != EINTR && errno != EAGAIN) {
369                 log_perror(scf->errh, __FILE__, __LINE__, "inotify error");
370                 /* TODO: could flush cache, close scf->fd, and re-open inotify*/
371             }
372             break;
373         }
374         for (int i = 0; i < rd; ) {
375             struct inotify_event * const in =
376               (struct inotify_event *)((uintptr_t)buf + i);
377             uint32_t len = in->len;
378             if (len > sizeof(buf)) break; /*(should not happen)*/
379             i += sizeof(struct inotify_event) + len;
380             if (i > rd) break; /*(should not happen (partial record))*/
381             if (in->mask & IN_CREATE)
382                 continue; /*(see comment below for FAMCreated)*/
383             if (in->mask & IN_Q_OVERFLOW) {
384                 log_error(scf->errh, __FILE__, __LINE__,
385                           "inotify queue overflow");
386                 continue;
387             }
388             /* ignore events which may have been pending for
389              * paths recently cancelled via FAMCancelMonitor() */
390             scf->wds = splaytree_splay(scf->wds, in->wd);
391             if (!scf->wds || scf->wds->key != in->wd)
392                 continue;
393             fam_dir_entry *fam_dir = scf->wds->data;
394             if (NULL == fam_dir)        /*(should not happen)*/
395                 continue;
396             if (fam_dir->req != in->wd) /*(should not happen)*/
397                 continue;
398             /*(specific to use here in stat_cache.c)*/
399             int code = 0;
400             if (in->mask & (IN_ATTRIB | IN_MODIFY))
401                 code = FAMChanged;
402             else if (in->mask & (IN_DELETE | IN_DELETE_SELF | IN_UNMOUNT))
403                 code = FAMDeleted;
404             else if (in->mask & (IN_MOVE_SELF | IN_MOVED_FROM))
405                 code = FAMMoved;
406 
407             if (len) {
408                 do { --len; } while (len && in->name[len-1] == '\0');
409             }
410             stat_cache_handle_fdevent_fn(scf, fam_dir, in->name, len, code);
411         }
412     } while (rd + sizeof(struct inotify_event) + NAME_MAX + 1 > sizeof(buf));
413   #elif defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE
414     struct kevent kevl[256];
415     struct timespec t0 = { 0, 0 };
416     int n;
417     do {
418         n = kevent(scf->fd, NULL, 0, kevl, sizeof(kevl)/sizeof(*kevl), &t0);
419         if (n <= 0) break;
420         for (int i = 0; i < n; ++i) {
421             const struct kevent * const kev = kevl+i;
422             /* ignore events which may have been pending for
423              * paths recently cancelled via FAMCancelMonitor() */
424             int ndx = (int)(intptr_t)kev->udata;
425             scf->dirs = splaytree_splay(scf->dirs, ndx);
426             if (!scf->dirs || scf->dirs->key != ndx)
427                 continue;
428             fam_dir_entry *fam_dir = scf->dirs->data;
429             if (fam_dir->req != (int)kev->ident)
430                 continue;
431             /*(specific to use here in stat_cache.c)*/
432             /* note: stat_cache only monitors on directories,
433              *       so events here are only on directories
434              * note: changes are treated as FAMDeleted since
435              *       it is unknown which file in dir was changed
436              *       This is not efficient, but this stat_cache mechanism also
437              *       should not be used on frequently modified directories. */
438             int code = 0;
439             if (kev->fflags & (NOTE_WRITE|NOTE_ATTRIB|NOTE_EXTEND|NOTE_LINK))
440                 code = FAMDeleted; /*(not FAMChanged; see comment above)*/
441             else if (kev->fflags & (NOTE_DELETE|NOTE_REVOKE))
442                 code = FAMDeleted;
443             else if (kev->fflags & NOTE_RENAME)
444                 code = FAMMoved;
445             if (kev->flags & EV_ERROR) /*(not expected; treat as FAMDeleted)*/
446                 code = FAMDeleted;
447             stat_cache_handle_fdevent_fn(scf, fam_dir, NULL, 0, code);
448         }
449     } while (n == sizeof(kevl)/sizeof(*kevl));
450   #else
451     for (int i = 0, ndx; i || (i = FAMPending(&scf->fam)) > 0; --i) {
452         FAMEvent fe;
453         if (FAMNextEvent(&scf->fam, &fe) < 0) break;
454 
455         /* ignore events which may have been pending for
456          * paths recently cancelled via FAMCancelMonitor() */
457         ndx = (int)(intptr_t)fe.userdata;
458         scf->dirs = splaytree_splay(scf->dirs, ndx);
459         if (!scf->dirs || scf->dirs->key != ndx) {
460             continue;
461         }
462         fam_dir_entry *fam_dir = scf->dirs->data;
463         if (FAMREQUEST_GETREQNUM(&fam_dir->req)
464             != FAMREQUEST_GETREQNUM(&fe.fr)) {
465             continue;
466         }
467 
468         uint32_t fnlen = (fe.code != FAMCreated && fe.filename[0] != '/')
469           ? (uint32_t)strlen(fe.filename)
470           : 0;
471         stat_cache_handle_fdevent_fn(scf, fam_dir, fe.filename, fnlen, fe.code);
472     }
473   #endif
474 }
475 
476 static void stat_cache_handle_fdevent_fn(stat_cache_fam * const scf, fam_dir_entry *fam_dir, const char * const fn, const uint32_t fnlen, int code)
477 {
478         if (fnlen) {
479             buffer * const n = &fam_dir->name;
480             fam_dir_entry *fam_link;
481             uint32_t len;
482             switch (code) {
483             case FAMCreated:
484                 /* file created in monitored dir modifies dir and
485                  * we should get a separate FAMChanged event for dir.
486                  * Therefore, ignore file FAMCreated event here.
487                  * Also, if FAMNoExists() is used, might get spurious
488                  * FAMCreated events as changes are made e.g. in monitored
489                  * sub-sub-sub dirs and the library discovers new (already
490                  * existing) dir entries */
491                 return;
492             case FAMChanged:
493                 /* file changed in monitored dir does not modify dir */
494             case FAMDeleted:
495             case FAMMoved:
496                 /* file deleted or moved in monitored dir modifies dir,
497                  * but FAM provides separate notification for that */
498 
499                 /* temporarily append filename to dir in fam_dir->name to
500                  * construct path, then delete stat_cache entry (if any)*/
501                 len = buffer_clen(n);
502                 buffer_append_path_len(n, fn, fnlen);
503                 /* (alternatively, could chose to stat() and update)*/
504                 stat_cache_invalidate_entry(BUF_PTR_LEN(n));
505 
506                 fam_link = /*(check if might be symlink to monitored dir)*/
507                 stat_cache_sptree_find(&scf->dirs, BUF_PTR_LEN(n));
508                 if (fam_link && !buffer_is_equal(&fam_link->name, n))
509                     fam_link = NULL;
510 
511                 buffer_truncate(n, len);
512 
513                 if (fam_link) {
514                     /* replaced symlink changes containing dir */
515                     stat_cache_invalidate_entry(n->ptr, len);
516                     /* handle symlink to dir as deleted dir below */
517                     code = FAMDeleted;
518                     fam_dir = fam_link;
519                     break;
520                 }
521                 return;
522             default:
523                 return;
524             }
525         }
526 
527         switch(code) {
528         case FAMChanged:
529             stat_cache_invalidate_entry(BUF_PTR_LEN(&fam_dir->name));
530             break;
531         case FAMDeleted:
532         case FAMMoved:
533             stat_cache_delete_tree(BUF_PTR_LEN(&fam_dir->name));
534             fam_dir_invalidate_node(fam_dir);
535             if (scf->dirs)
536                 fam_dir_invalidate_tree(scf->dirs,
537                                         BUF_PTR_LEN(&fam_dir->name));
538             fam_dir_periodic_cleanup();
539             break;
540         default:
541             break;
542         }
543 }
544 
545 static handler_t stat_cache_handle_fdevent(void *ctx, int revent)
546 {
547 	stat_cache_fam * const scf = ctx; /* sc.scf */
548 
549 	if (revent & FDEVENT_IN) {
550 		stat_cache_handle_fdevent_in(scf);
551 	}
552 
553 	if (revent & (FDEVENT_HUP|FDEVENT_RDHUP)) {
554 		/* fam closed the connection */
555 		log_error(scf->errh, __FILE__, __LINE__,
556 		  "FAM connection closed; disabling stat_cache.");
557 		/* (although effectively STAT_CACHE_ENGINE_NONE,
558 		 *  do not change here so that periodic jobs clean up memory)*/
559 		/*sc.stat_cache_engine = STAT_CACHE_ENGINE_NONE; */
560 		fdevent_fdnode_event_del(scf->ev, scf->fdn);
561 		fdevent_unregister(scf->ev, scf->fd);
562 		scf->fdn = NULL;
563 
564 		FAMClose(&scf->fam);
565 		scf->fd = -1;
566 	}
567 
568 	return HANDLER_GO_ON;
569 }
570 
571 static stat_cache_fam * stat_cache_init_fam(fdevents *ev, log_error_st *errh) {
572 	stat_cache_fam *scf = calloc(1, sizeof(*scf));
573 	force_assert(scf);
574 	scf->fd = -1;
575 	scf->ev = ev;
576 	scf->errh = errh;
577 
578   #ifdef HAVE_SYS_INOTIFY_H
579 	scf->fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
580 	if (scf->fd < 0) {
581 		log_perror(errh, __FILE__, __LINE__, "inotify_init1()");
582 		free(scf);
583 		return NULL;
584 	}
585   #elif defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE
586    #ifdef __NetBSD__
587 	scf->fd = kqueue1(O_NONBLOCK|O_CLOEXEC|O_NOSIGPIPE);
588    #else
589 	scf->fd = kqueue();
590 	if (scf->fd >= 0) fdevent_setfd_cloexec(scf->fd);
591    #endif
592 	if (scf->fd < 0) {
593 		log_perror(errh, __FILE__, __LINE__, "kqueue()");
594 		free(scf);
595 		return NULL;
596 	}
597   #else
598 	/* setup FAM */
599 	if (0 != FAMOpen2(&scf->fam, "lighttpd")) {
600 		log_error(errh, __FILE__, __LINE__,
601 		  "could not open a fam connection, dying.");
602 		free(scf);
603 		return NULL;
604 	}
605       #ifdef HAVE_FAMNOEXISTS
606       #ifdef LIGHTTPD_STATIC
607 	FAMNoExists(&scf->fam);
608       #else
609 	int (*FAMNoExists_fn)(FAMConnection *);
610 	FAMNoExists_fn =
611 	  (int (*)(FAMConnection *))(intptr_t)dlsym(RTLD_DEFAULT,"FAMNoExists");
612 	if (FAMNoExists_fn) FAMNoExists_fn(&scf->fam);
613       #endif
614       #endif
615 
616 	scf->fd = FAMCONNECTION_GETFD(&scf->fam);
617 	fdevent_setfd_cloexec(scf->fd);
618   #endif
619 	scf->fdn = fdevent_register(scf->ev, scf->fd, stat_cache_handle_fdevent, scf);
620 	fdevent_fdnode_event_set(scf->ev, scf->fdn, FDEVENT_IN | FDEVENT_RDHUP);
621 
622 	return scf;
623 }
624 
625 static void stat_cache_free_fam(stat_cache_fam *scf) {
626 	if (NULL == scf) return;
627 
628       #ifdef HAVE_SYS_INOTIFY_H
629 	while (scf->wds) {
630 		splay_tree *node = scf->wds;
631 		scf->wds = splaytree_delete(scf->wds, node->key);
632 	}
633       #elif defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE
634 	/*(quicker cleanup to close kqueue() before cancel per entry)*/
635 	close(scf->fd);
636 	scf->fd = -1;
637       #endif
638 	while (scf->dirs) {
639 		/*(skip entry invalidation and FAMCancelMonitor())*/
640 		splay_tree *node = scf->dirs;
641 		fam_dir_entry_free((fam_dir_entry *)node->data);
642 		scf->dirs = splaytree_delete(scf->dirs, node->key);
643 	}
644 
645 	if (-1 != scf->fd) {
646 		/*scf->fdn already cleaned up in fdevent_free()*/
647 		FAMClose(&scf->fam);
648 		/*scf->fd = -1;*/
649 	}
650 
651 	free(scf);
652 }
653 
654 static fam_dir_entry * fam_dir_monitor(stat_cache_fam *scf, char *fn, uint32_t dirlen, struct stat *st)
655 {
656     if (NULL == scf->fdn) return NULL; /* FAM connection closed; do nothing */
657     const int fn_is_dir = S_ISDIR(st->st_mode);
658     /*force_assert(0 != dirlen);*/
659     /*force_assert(fn[0] == '/');*/
660     /* consistency: ensure fn does not end in '/' unless root "/"
661      * FAM events will not end in '/', so easier to match this way */
662     if (fn[dirlen-1] == '/') --dirlen;
663     if (0 == dirlen) dirlen = 1; /* root dir ("/") */
664     /* Note: paths are expected to be normalized before calling stat_cache,
665      * e.g. without repeated '/' */
666     if (!fn_is_dir) {
667         while (fn[--dirlen] != '/') ;
668         if (0 == dirlen) dirlen = 1; /*(should not happen for file)*/
669     }
670     int dir_ndx = splaytree_djbhash(fn, dirlen);
671     fam_dir_entry *fam_dir = NULL;
672 
673     scf->dirs = splaytree_splay(scf->dirs, dir_ndx);
674     if (NULL != scf->dirs && scf->dirs->key == dir_ndx) {
675         fam_dir = scf->dirs->data;
676         if (!buffer_eq_slen(&fam_dir->name, fn, dirlen)) {
677             /* hash collision; preserve existing
678              * do not monitor new to avoid cache thrashing */
679             return NULL;
680         }
681         /* directory already registered */
682     }
683 
684     const unix_time64_t cur_ts = log_monotonic_secs;
685     struct stat lst;
686     int ck_dir = fn_is_dir;
687     if (!fn_is_dir && (NULL==fam_dir || cur_ts - fam_dir->stat_ts >= 16)) {
688         ck_dir = 1;
689         /*(temporarily modify fn)*/
690         fn[dirlen] = '\0';
691         if (0 != lstat(fn, &lst)) {
692             fn[dirlen] = '/';
693             return NULL;
694         }
695         if (!S_ISLNK(lst.st_mode)) {
696             st = &lst;
697         }
698         else if (0 != stat(fn, st)) { /*st passed in now is stat() of dir*/
699             fn[dirlen] = '/';
700             return NULL;
701         }
702         fn[dirlen] = '/';
703     }
704 
705     int ck_lnk = (NULL == fam_dir);
706     if (ck_dir && NULL != fam_dir) {
707         /* check stat() matches device and inode, just in case an external event
708          * not being monitored occurs (e.g. rename of unmonitored parent dir)*/
709         if (st->st_dev != fam_dir->st_dev || st->st_ino != fam_dir->st_ino) {
710             ck_lnk = 1;
711             /*(modifies scf->dirs but no need to re-splay for dir_ndx since
712              * fam_dir is not NULL and so splaytree_insert not called below)*/
713             if (scf->dirs) fam_dir_invalidate_tree(scf->dirs, fn, dirlen);
714             if (!fn_is_dir) /*(if dir, caller is updating stat_cache_entry)*/
715                 stat_cache_update_entry(fn, dirlen, st, NULL);
716             /*(must not delete tree since caller is holding a valid node)*/
717             stat_cache_invalidate_dir_tree(fn, dirlen);
718           #ifdef HAVE_SYS_INOTIFY_H
719             scf->wds = splaytree_delete(scf->wds, fam_dir->req);
720           #endif
721             if (0 != FAMCancelMonitor(&scf->fam, &fam_dir->req)
722                 || 0 != FAMMonitorDirectory(&scf->fam, fam_dir->name.ptr,
723                                             &fam_dir->req,
724                                             (void *)(intptr_t)dir_ndx)) {
725                 fam_dir->stat_ts = 0; /* invalidate */
726                 return NULL;
727             }
728             fam_dir->st_dev = st->st_dev;
729             fam_dir->st_ino = st->st_ino;
730           #ifdef HAVE_SYS_INOTIFY_H
731             scf->wds = splaytree_insert(scf->wds, fam_dir->req, fam_dir);
732           #endif
733         }
734         fam_dir->stat_ts = cur_ts;
735     }
736 
737     if (NULL == fam_dir) {
738         fam_dir = fam_dir_entry_init(fn, dirlen);
739 
740         if (0 != FAMMonitorDirectory(&scf->fam,fam_dir->name.ptr,&fam_dir->req,
741                                      (void *)(intptr_t)dir_ndx)) {
742           #if defined(HAVE_SYS_INOTIFY_H) \
743            || (defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE)
744             log_perror(scf->errh, __FILE__, __LINE__,
745               "monitoring dir failed: %s file: %s",
746               fam_dir->name.ptr, fn);
747           #else
748             log_error(scf->errh, __FILE__, __LINE__,
749               "monitoring dir failed: %s file: %s %s",
750               fam_dir->name.ptr, fn, FamErrlist[FAMErrno]);
751           #endif
752             fam_dir_entry_free(fam_dir);
753             return NULL;
754         }
755 
756         scf->dirs = splaytree_insert(scf->dirs, dir_ndx, fam_dir);
757       #ifdef HAVE_SYS_INOTIFY_H
758         scf->wds = splaytree_insert(scf->wds, fam_dir->req, fam_dir);
759       #endif
760         fam_dir->stat_ts= cur_ts;
761         fam_dir->st_dev = st->st_dev;
762         fam_dir->st_ino = st->st_ino;
763     }
764 
765     if (ck_lnk) {
766         if (fn_is_dir) {
767             /*(temporarily modify fn)*/
768             char e = fn[dirlen];
769             fn[dirlen] = '\0';
770             if (0 != lstat(fn, &lst)) {
771                 fn[dirlen] = e;
772                 return NULL;
773             }
774             fn[dirlen] = e;
775         }
776         if (fam_dir->fam_parent) {
777             --fam_dir->fam_parent->refcnt;
778             fam_dir->fam_parent = NULL;
779         }
780         if (S_ISLNK(lst.st_mode)) {
781             fam_dir->fam_parent = fam_dir_monitor(scf, fn, dirlen, &lst);
782         }
783     }
784 
785     ++fam_dir->refcnt;
786     return fam_dir;
787 }
788 
789 #endif
790 
791 
792 static stat_cache_entry * stat_cache_entry_init(void) {
793     stat_cache_entry *sce = calloc(1, sizeof(*sce));
794     force_assert(NULL != sce);
795     sce->fd = -1;
796     sce->refcnt = 1;
797     return sce;
798 }
799 
800 static void stat_cache_entry_free(void *data) {
801     stat_cache_entry *sce = data;
802     if (!sce) return;
803 
804     if (--sce->refcnt) return;
805 
806   #ifdef HAVE_FAM_H
807     /*(decrement refcnt only;
808      * defer cancelling FAM monitor on dir even if refcnt reaches zero)*/
809     if (sce->fam_dir) --((fam_dir_entry *)sce->fam_dir)->refcnt;
810   #endif
811 
812     free(sce->name.ptr);
813     free(sce->etag.ptr);
814     if (sce->content_type.size) free(sce->content_type.ptr);
815     if (sce->fd >= 0) close(sce->fd);
816 
817     free(sce);
818 }
819 
820 void stat_cache_entry_refchg(void *data, int mod) {
821     /*(expect mod == -1 or mod == 1)*/
822     stat_cache_entry * const sce = data;
823     if (mod < 0 && 1 == sce->refcnt)
824         stat_cache_entry_free(data);
825     else
826         sce->refcnt += mod;
827 }
828 
829 #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
830 
831 static const char *attrname = "Content-Type";
832 static char attrval[128];
833 static buffer attrb = { attrval, 0, 0 };
834 
835 static int stat_cache_attr_get(const char *name) {
836   #if defined(HAVE_XATTR)
837    #if defined(HAVE_SYS_XATTR_H)
838     ssize_t attrlen;
839     if (0 < (attrlen = getxattr(name, attrname,
840                                 attrval, sizeof(attrval)-1)))
841    #else
842     int attrlen = sizeof(attrval)-1;
843     if (0 == attr_get(name, attrname, attrval, &attrlen, 0))
844    #endif
845   #elif defined(HAVE_EXTATTR)
846     ssize_t attrlen;
847     if (0 < (attrlen = extattr_get_file(name, EXTATTR_NAMESPACE_USER, attrname,
848                                         attrval, sizeof(attrval)-1)))
849   #endif
850     {
851         attrval[attrlen] = '\0';
852         attrb.used = (uint32_t)(attrlen + 1);
853         return 1;
854     }
855     return 0;
856 }
857 
858 #endif
859 
860 int stat_cache_init(fdevents *ev, log_error_st *errh) {
861   #ifdef HAVE_FAM_H
862     if (sc.stat_cache_engine == STAT_CACHE_ENGINE_FAM) {
863         sc.scf = stat_cache_init_fam(ev, errh);
864         if (NULL == sc.scf) return 0;
865     }
866   #else
867     UNUSED(ev);
868     UNUSED(errh);
869   #endif
870 
871     return 1;
872 }
873 
874 void stat_cache_free(void) {
875     splay_tree *sptree = sc.files;
876     while (sptree) {
877         stat_cache_entry_free(sptree->data);
878         sptree = splaytree_delete(sptree, sptree->key);
879     }
880     sc.files = NULL;
881 
882   #ifdef HAVE_FAM_H
883     stat_cache_free_fam(sc.scf);
884     sc.scf = NULL;
885   #endif
886 
887   #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
888     attrname = "Content-Type";
889   #endif
890 
891     sc.stat_cache_engine = STAT_CACHE_ENGINE_SIMPLE; /*(default)*/
892 }
893 
894 void stat_cache_xattrname (const char *name) {
895   #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
896     attrname = name;
897   #else
898     UNUSED(name);
899   #endif
900 }
901 
902 int stat_cache_choose_engine (const buffer *stat_cache_string, log_error_st *errh) {
903     if (buffer_is_blank(stat_cache_string))
904         sc.stat_cache_engine = STAT_CACHE_ENGINE_SIMPLE;
905     else if (buffer_eq_slen(stat_cache_string, CONST_STR_LEN("simple")))
906         sc.stat_cache_engine = STAT_CACHE_ENGINE_SIMPLE;
907 #ifdef HAVE_SYS_INOTIFY_H
908     else if (buffer_eq_slen(stat_cache_string, CONST_STR_LEN("inotify")))
909         sc.stat_cache_engine = STAT_CACHE_ENGINE_INOTIFY;
910         /*(STAT_CACHE_ENGINE_FAM == STAT_CACHE_ENGINE_INOTIFY)*/
911 #elif defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE
912     else if (buffer_eq_slen(stat_cache_string, CONST_STR_LEN("kqueue")))
913         sc.stat_cache_engine = STAT_CACHE_ENGINE_KQUEUE;
914         /*(STAT_CACHE_ENGINE_FAM == STAT_CACHE_ENGINE_KQUEUE)*/
915 #endif
916 #ifdef HAVE_FAM_H
917     else if (buffer_eq_slen(stat_cache_string, CONST_STR_LEN("fam")))
918         sc.stat_cache_engine = STAT_CACHE_ENGINE_FAM;
919 #endif
920     else if (buffer_eq_slen(stat_cache_string, CONST_STR_LEN("disable"))
921              || buffer_eq_slen(stat_cache_string, CONST_STR_LEN("none")))
922         sc.stat_cache_engine = STAT_CACHE_ENGINE_NONE;
923     else {
924         log_error(errh, __FILE__, __LINE__,
925           "server.stat-cache-engine can be one of \"disable\", \"simple\","
926 #ifdef HAVE_SYS_INOTIFY_H
927           " \"inotify\","
928 #elif defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE
929           " \"kqueue\","
930 #endif
931 #ifdef HAVE_FAM_H
932           " \"fam\","
933 #endif
934           " but not: %s", stat_cache_string->ptr);
935         return -1;
936     }
937     return 0;
938 }
939 
940 const buffer * stat_cache_mimetype_by_ext(const array * const mimetypes, const char * const name, const uint32_t nlen)
941 {
942     const char * const end = name + nlen; /*(end of string)*/
943     const uint32_t used = mimetypes->used;
944     if (used < 16) {
945         for (uint32_t i = 0; i < used; ++i) {
946             /* suffix match */
947             const data_string *ds = (data_string *)mimetypes->data[i];
948             const size_t klen = buffer_clen(&ds->key);
949             if (klen <= nlen && buffer_eq_icase_ssn(end-klen, ds->key.ptr, klen))
950                 return &ds->value;
951         }
952     }
953     else {
954         const char *s;
955         const data_string *ds;
956         if (nlen) {
957             for (s = end-1; s != name && *s != '/'; --s) ; /*(like memrchr())*/
958             if (*s == '/') ++s;
959         }
960         else {
961             s = name;
962         }
963         /* search for basename, then longest .ext2.ext1, then .ext1, then "" */
964         ds = (const data_string *)array_get_element_klen(mimetypes, s, end - s);
965         if (NULL != ds) return &ds->value;
966         while (++s < end) {
967             while (*s != '.' && ++s != end) ;
968             if (s == end) break;
969             /* search ".ext" then "ext" */
970             ds = (const data_string *)array_get_element_klen(mimetypes, s, end - s);
971             if (NULL != ds) return &ds->value;
972             /* repeat search without leading '.' to handle situation where
973              * admin configured mimetype.assign keys without leading '.' */
974             if (++s < end) {
975                 if (*s == '.') { --s; continue; }
976                 ds = (const data_string *)array_get_element_klen(mimetypes, s, end - s);
977                 if (NULL != ds) return &ds->value;
978             }
979         }
980         /* search for ""; catchall */
981         ds = (const data_string *)array_get_element_klen(mimetypes, CONST_STR_LEN(""));
982         if (NULL != ds) return &ds->value;
983     }
984 
985     return NULL;
986 }
987 
988 #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
989 
990 const buffer * stat_cache_mimetype_by_xattr(const char * const name)
991 {
992     return stat_cache_attr_get(name) ? &attrb : NULL;
993 }
994 
995 const buffer * stat_cache_content_type_get_by_xattr(stat_cache_entry *sce, const array *mimetypes, int use_xattr)
996 {
997     /*(invalid caching if user config has multiple, different
998      * r->conf.mimetypes for same extension (not expected))*/
999     if (!buffer_is_blank(&sce->content_type)) return &sce->content_type;
1000 
1001     if (!S_ISREG(sce->st.st_mode)) return NULL;
1002 
1003     /* cache mimetype */
1004     const buffer *mtype =
1005       (use_xattr) ? stat_cache_mimetype_by_xattr(sce->name.ptr) : NULL;
1006     if (NULL == mtype)
1007         mtype = stat_cache_mimetype_by_ext(mimetypes, BUF_PTR_LEN(&sce->name));
1008     if (NULL != mtype) {
1009         if (sce->content_type.size) {
1010             buffer_copy_buffer(&sce->content_type, mtype);
1011         }
1012         else if (mtype == &attrb) {
1013             sce->content_type.ptr = NULL;
1014             buffer_copy_buffer(&sce->content_type, mtype);
1015         }
1016         else {
1017             /*(copy pointers from mimetypes array; avoid allocation)*/
1018             sce->content_type.ptr = mtype->ptr;
1019             sce->content_type.used = mtype->used;
1020             /*(leave sce->content_type.size = 0 to flag not-allocated)*/
1021         }
1022     }
1023     else
1024         buffer_clear(&sce->content_type);
1025 
1026     return &sce->content_type;
1027 }
1028 
1029 #else
1030 
1031 const buffer * stat_cache_content_type_get_by_ext(stat_cache_entry *sce, const array *mimetypes)
1032 {
1033     /*(invalid caching if user config has multiple, different
1034      * r->conf.mimetypes for same extension (not expected))*/
1035     if (!buffer_is_blank(&sce->content_type)) return &sce->content_type;
1036 
1037     if (!S_ISREG(sce->st.st_mode)) return NULL;
1038 
1039     /* cache mimetype */
1040     const buffer * const mtype =
1041       stat_cache_mimetype_by_ext(mimetypes, BUF_PTR_LEN(&sce->name));
1042     if (NULL != mtype) {
1043         /*(copy pointers from mimetypes array; avoid allocation)*/
1044         sce->content_type.ptr = mtype->ptr;
1045         sce->content_type.used = mtype->used;
1046         /*(leave sce->content_type.size = 0 to flag not-allocated)*/
1047     }
1048     else
1049         buffer_clear(&sce->content_type);
1050 
1051     return &sce->content_type;
1052 }
1053 
1054 #endif
1055 
1056 const buffer * stat_cache_etag_get(stat_cache_entry *sce, int flags) {
1057     /*(invalid caching if user cfg has multiple, different r->conf.etag_flags
1058      * for same path (not expected, since etag flags should be by filesystem))*/
1059     if (!buffer_is_blank(&sce->etag)) return &sce->etag;
1060 
1061     if (S_ISREG(sce->st.st_mode) || S_ISDIR(sce->st.st_mode)) {
1062         if (0 == flags) return NULL;
1063         http_etag_create(&sce->etag, &sce->st, flags);
1064         return &sce->etag;
1065     }
1066 
1067     return NULL;
1068 }
1069 
1070 __attribute_pure__
1071 static int stat_cache_stat_eq(const struct stat * const sta, const struct stat * const stb) {
1072     return
1073       #ifdef st_mtime /* use high-precision timestamp if available */
1074       #if defined(__APPLE__) && defined(__MACH__)
1075         sta->st_mtimespec.tv_nsec == stb->st_mtimespec.tv_nsec
1076       #else
1077         sta->st_mtim.tv_nsec == stb->st_mtim.tv_nsec
1078       #endif
1079       #else
1080         1
1081       #endif
1082         && sta->st_mtime == stb->st_mtime
1083         && sta->st_size  == stb->st_size
1084         && sta->st_ino   == stb->st_ino
1085         && sta->st_dev   == stb->st_dev;
1086 }
1087 
1088 void stat_cache_update_entry(const char *name, uint32_t len,
1089                              struct stat *st, buffer *etagb)
1090 {
1091     if (sc.stat_cache_engine == STAT_CACHE_ENGINE_NONE) return;
1092     force_assert(0 != len);
1093     if (name[len-1] == '/') { if (0 == --len) len = 1; }
1094     splay_tree **sptree = &sc.files;
1095     stat_cache_entry *sce =
1096       stat_cache_sptree_find(sptree, name, len);
1097     if (sce && buffer_is_equal_string(&sce->name, name, len)) {
1098         if (!stat_cache_stat_eq(&sce->st, st)) {
1099             /* etagb might be NULL to clear etag (invalidate) */
1100             buffer_clear(&sce->etag);
1101             if (etagb)
1102                 buffer_copy_string_len(&sce->etag, BUF_PTR_LEN(etagb));
1103           #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
1104             buffer_clear(&sce->content_type);
1105           #endif
1106             if (sce->fd >= 0) {
1107                 if (1 == sce->refcnt) {
1108                     close(sce->fd);
1109                     sce->fd = -1;
1110                 }
1111                 else {
1112                     --sce->refcnt; /* stat_cache_entry_free(sce); */
1113                     (*sptree)->data = sce = stat_cache_entry_init();
1114                     buffer_copy_string_len(&sce->name, name, len);
1115                 }
1116             }
1117             sce->st = *st;
1118         }
1119         sce->stat_ts = log_monotonic_secs;
1120     }
1121 }
1122 
1123 void stat_cache_delete_entry(const char *name, uint32_t len)
1124 {
1125     if (sc.stat_cache_engine == STAT_CACHE_ENGINE_NONE) return;
1126     force_assert(0 != len);
1127     if (name[len-1] == '/') { if (0 == --len) len = 1; }
1128     splay_tree **sptree = &sc.files;
1129     stat_cache_entry *sce = stat_cache_sptree_find(sptree, name, len);
1130     if (sce && buffer_is_equal_string(&sce->name, name, len)) {
1131         stat_cache_entry_free(sce);
1132         *sptree = splaytree_delete(*sptree, (*sptree)->key);
1133     }
1134 }
1135 
1136 void stat_cache_invalidate_entry(const char *name, uint32_t len)
1137 {
1138     splay_tree **sptree = &sc.files;
1139     stat_cache_entry *sce = stat_cache_sptree_find(sptree, name, len);
1140     if (sce && buffer_is_equal_string(&sce->name, name, len)) {
1141         sce->stat_ts = 0;
1142       #ifdef HAVE_FAM_H
1143         if (sce->fam_dir != NULL) {
1144             --((fam_dir_entry *)sce->fam_dir)->refcnt;
1145             sce->fam_dir = NULL;
1146         }
1147       #endif
1148     }
1149 }
1150 
1151 #ifdef HAVE_FAM_H
1152 
1153 static void stat_cache_invalidate_dir_tree_walk(splay_tree *t,
1154                                                 const char *name, size_t len)
1155 {
1156     if (t->left)  stat_cache_invalidate_dir_tree_walk(t->left,  name, len);
1157     if (t->right) stat_cache_invalidate_dir_tree_walk(t->right, name, len);
1158 
1159     const buffer * const b = &((stat_cache_entry *)t->data)->name;
1160     const size_t blen = buffer_clen(b);
1161     if (blen > len && b->ptr[len] == '/' && 0 == memcmp(b->ptr, name, len)) {
1162         stat_cache_entry *sce = t->data;
1163         sce->stat_ts = 0;
1164         if (sce->fam_dir != NULL) {
1165             --((fam_dir_entry *)sce->fam_dir)->refcnt;
1166             sce->fam_dir = NULL;
1167         }
1168     }
1169 }
1170 
1171 static void stat_cache_invalidate_dir_tree(const char *name, size_t len)
1172 {
1173     splay_tree * const sptree = sc.files;
1174     if (sptree) stat_cache_invalidate_dir_tree_walk(sptree, name, len);
1175 }
1176 
1177 #endif
1178 
1179 /*
1180  * walk though splay_tree and collect contents of dir tree.
1181  * remove tagged entries in a second loop
1182  */
1183 
1184 static void stat_cache_tag_dir_tree(splay_tree *t, const char *name, size_t len,
1185                                     int *keys, int *ndx)
1186 {
1187     if (*ndx == 8192) return; /*(must match num array entries in keys[])*/
1188     if (t->left)  stat_cache_tag_dir_tree(t->left,  name, len, keys, ndx);
1189     if (t->right) stat_cache_tag_dir_tree(t->right, name, len, keys, ndx);
1190     if (*ndx == 8192) return; /*(must match num array entries in keys[])*/
1191 
1192     const buffer * const b = &((stat_cache_entry *)t->data)->name;
1193     const size_t blen = buffer_clen(b);
1194     if (blen > len && b->ptr[len] == '/' && 0 == memcmp(b->ptr, name, len))
1195         keys[(*ndx)++] = t->key;
1196 }
1197 
1198 __attribute_noinline__
1199 static void stat_cache_prune_dir_tree(const char *name, size_t len)
1200 {
1201     splay_tree *sptree = sc.files;
1202     int max_ndx, i;
1203     int keys[8192]; /* 32k size on stack */
1204     do {
1205         if (!sptree) break;
1206         max_ndx = 0;
1207         stat_cache_tag_dir_tree(sptree, name, len, keys, &max_ndx);
1208         for (i = 0; i < max_ndx; ++i) {
1209             const int ndx = keys[i];
1210             splay_tree *node = sptree = splaytree_splay(sptree, ndx);
1211             if (node && node->key == ndx) {
1212                 stat_cache_entry_free(node->data);
1213                 sptree = splaytree_delete(sptree, ndx);
1214             }
1215         }
1216     } while (max_ndx == sizeof(keys)/sizeof(int));
1217     sc.files = sptree;
1218 }
1219 
1220 static void stat_cache_delete_tree(const char *name, uint32_t len)
1221 {
1222     stat_cache_delete_entry(name, len);
1223     stat_cache_prune_dir_tree(name, len);
1224 }
1225 
1226 void stat_cache_delete_dir(const char *name, uint32_t len)
1227 {
1228     force_assert(0 != len);
1229     if (name[len-1] == '/') { if (0 == --len) len = 1; }
1230     stat_cache_delete_tree(name, len);
1231   #ifdef HAVE_FAM_H
1232     if (sc.stat_cache_engine == STAT_CACHE_ENGINE_FAM) {
1233         splay_tree **sptree = &sc.scf->dirs;
1234         fam_dir_entry *fam_dir = stat_cache_sptree_find(sptree, name, len);
1235         if (fam_dir && buffer_eq_slen(&fam_dir->name, name, len))
1236             fam_dir_invalidate_node(fam_dir);
1237         if (*sptree) fam_dir_invalidate_tree(*sptree, name, len);
1238         fam_dir_periodic_cleanup();
1239     }
1240   #endif
1241 }
1242 
1243 /***
1244  *
1245  *
1246  *
1247  * returns:
1248  *  - HANDLER_FINISHED on cache-miss (don't forget to reopen the file)
1249  *  - HANDLER_ERROR on stat() failed -> see errno for problem
1250  */
1251 
1252 stat_cache_entry * stat_cache_get_entry(const buffer * const name) {
1253 	stat_cache_entry *sce = NULL;
1254 
1255 	/* consistency: ensure lookup name does not end in '/' unless root "/"
1256 	 * (but use full path given with stat(), even with trailing '/') */
1257 	int final_slash = 0;
1258 	size_t len = buffer_clen(name);
1259 	force_assert(0 != len);
1260 	if (name->ptr[len-1] == '/') { final_slash = 1; if (0 == --len) len = 1; }
1261 	/* Note: paths are expected to be normalized before calling stat_cache,
1262 	 * e.g. without repeated '/' */
1263 
1264 	if (name->ptr[0] != '/') {
1265 		errno = EINVAL;
1266 		return NULL;
1267 	}
1268 
1269 	/*
1270 	 * check if the directory for this file has changed
1271 	 */
1272 
1273 	const unix_time64_t cur_ts = log_monotonic_secs;
1274 
1275 	const int file_ndx = splaytree_djbhash(name->ptr, len);
1276 	splay_tree *sptree = sc.files = splaytree_splay(sc.files, file_ndx);
1277 
1278 	if (sptree && (sptree->key == file_ndx)) {
1279 		/* we have seen this file already and
1280 		 * don't stat() it again in the same second */
1281 
1282 		sce = sptree->data;
1283 
1284 		/* check if the name is the same, we might have a collision */
1285 
1286 		if (buffer_is_equal_string(&sce->name, name->ptr, len)) {
1287 			if (sc.stat_cache_engine == STAT_CACHE_ENGINE_SIMPLE) {
1288 				if (sce->stat_ts == cur_ts) {
1289 					if (final_slash && !S_ISDIR(sce->st.st_mode)) {
1290 						errno = ENOTDIR;
1291 						return NULL;
1292 					}
1293 					return sce;
1294 				}
1295 			}
1296 		      #ifdef HAVE_FAM_H
1297 			else if (sc.stat_cache_engine == STAT_CACHE_ENGINE_FAM
1298 				 && sce->fam_dir) { /* entry is in monitored dir */
1299 				/* re-stat() periodically, even if monitoring for changes
1300 				 * (due to limitations in stat_cache.c use of FAM)
1301 				 * (gaps due to not continually monitoring an entire tree) */
1302 				if (cur_ts - sce->stat_ts < 16) {
1303 					if (final_slash && !S_ISDIR(sce->st.st_mode)) {
1304 						errno = ENOTDIR;
1305 						return NULL;
1306 					}
1307 					return sce;
1308 				}
1309 			}
1310 		      #endif
1311 		} else {
1312 			/* collision, forget about the entry */
1313 			sce = NULL;
1314 		}
1315 	}
1316 
1317 	struct stat st;
1318 	if (-1 == stat(name->ptr, &st)) {
1319 		return NULL;
1320 	}
1321 
1322 	if (NULL == sce) {
1323 
1324 		/* fix broken stat/open for symlinks to reg files with appended slash on freebsd,osx */
1325 		if (final_slash && S_ISREG(st.st_mode)) {
1326 			errno = ENOTDIR;
1327 			return NULL;
1328 		}
1329 
1330 		sce = stat_cache_entry_init();
1331 		buffer_copy_string_len(&sce->name, name->ptr, len);
1332 
1333 		/* already splayed file_ndx */
1334 		if (NULL != sptree && sptree->key == file_ndx) {
1335 			/* hash collision: replace old entry */
1336 			stat_cache_entry_free(sptree->data);
1337 			sptree->data = sce;
1338 		} else {
1339 			/*sptree =*/ sc.files = splaytree_insert(sptree, file_ndx, sce);
1340 		}
1341 
1342 	} else {
1343 
1344 		buffer_clear(&sce->etag);
1345 	      #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
1346 		buffer_clear(&sce->content_type);
1347 	      #endif
1348 
1349 		/* close fd if file changed */
1350 		if (sce->fd >= 0 && !stat_cache_stat_eq(&sce->st, &st)) {
1351 			if (1 == sce->refcnt) {
1352 				close(sce->fd);
1353 				sce->fd = -1;
1354 			}
1355 			else {
1356 				--sce->refcnt; /* stat_cache_entry_free(sce); */
1357 				sptree->data = sce = stat_cache_entry_init();
1358 				buffer_copy_string_len(&sce->name, name->ptr, len);
1359 			}
1360 		}
1361 	}
1362 
1363 	sce->st = st; /*(copy prior to calling fam_dir_monitor())*/
1364 
1365 #ifdef HAVE_FAM_H
1366 	if (sc.stat_cache_engine == STAT_CACHE_ENGINE_FAM) {
1367 		if (sce->fam_dir) --((fam_dir_entry *)sce->fam_dir)->refcnt;
1368 		sce->fam_dir =
1369 		  fam_dir_monitor(sc.scf, name->ptr, len, &st);
1370 	      #if 0 /*(performed below)*/
1371 		if (NULL != sce->fam_dir) {
1372 			/*(may have been invalidated by dir change)*/
1373 			sce->stat_ts = cur_ts;
1374 		}
1375 	      #endif
1376 	}
1377 #endif
1378 
1379 	sce->stat_ts = cur_ts;
1380 	return sce;
1381 }
1382 
1383 stat_cache_entry * stat_cache_get_entry_open(const buffer * const name, const int symlinks) {
1384     stat_cache_entry * const sce = stat_cache_get_entry(name);
1385     if (NULL == sce) return NULL;
1386     if (sce->fd >= 0) return sce;
1387     if (sce->st.st_size > 0) {
1388         sce->fd = stat_cache_open_rdonly_fstat(name, &sce->st, symlinks);
1389         buffer_clear(&sce->etag);
1390     }
1391     return sce; /* (note: sce->fd might still be -1 if open() failed) */
1392 }
1393 
1394 const stat_cache_st * stat_cache_path_stat (const buffer * const name) {
1395     const stat_cache_entry * const sce = stat_cache_get_entry(name);
1396     return sce ? &sce->st : NULL;
1397 }
1398 
1399 int stat_cache_path_isdir(const buffer *name) {
1400     const stat_cache_entry * const sce = stat_cache_get_entry(name);
1401     return (sce && (S_ISDIR(sce->st.st_mode) ? 1 : (errno = ENOTDIR, 0)));
1402 }
1403 
1404 int stat_cache_path_contains_symlink(const buffer *name, log_error_st *errh) {
1405     /* caller should check for symlinks only if we should block symlinks. */
1406 
1407     /* catch the obvious symlinks
1408      *
1409      * this is not a secure check as we still have a race-condition between
1410      * the stat() and the open. We can only solve this by
1411      * 1. open() the file
1412      * 2. fstat() the fd
1413      *
1414      * and keeping the file open for the rest of the time. But this can
1415      * only be done at network level.
1416      * */
1417 
1418   #ifdef HAVE_LSTAT
1419     /* we assume "/" can not be symlink,
1420      * so skip the symlink stuff if path is "/" */
1421     size_t len = buffer_clen(name);
1422     force_assert(0 != len);
1423     force_assert(name->ptr[0] == '/');
1424     if (1 == len) return 0;
1425    #ifndef PATH_MAX
1426    #define PATH_MAX 4096
1427    #endif
1428     if (len >= PATH_MAX) return -1;
1429 
1430     char buf[PATH_MAX];
1431     memcpy(buf, name->ptr, len);
1432     char *s_cur = buf+len;
1433     do {
1434         *s_cur = '\0';
1435         struct stat st;
1436         if (0 == lstat(buf, &st)) {
1437             if (S_ISLNK(st.st_mode)) return 1;
1438         }
1439         else {
1440             log_perror(errh, __FILE__, __LINE__, "lstat failed for: %s", buf);
1441             return -1;
1442         }
1443     } while ((s_cur = strrchr(buf, '/')) > buf); /*(&buf[0]==buf; NULL < buf)*/
1444   #endif
1445 
1446     return 0;
1447 }
1448 
1449 int stat_cache_open_rdonly_fstat (const buffer *name, struct stat *st, int symlinks) {
1450 	/*(Note: O_NOFOLLOW affects only the final path segment, the target file,
1451 	 * not any intermediate symlinks along the path)*/
1452 	const int fd = fdevent_open_cloexec(name->ptr, symlinks, O_RDONLY, 0);
1453 	if (fd >= 0) {
1454 		if (0 == fstat(fd, st)) {
1455 			return fd;
1456 		} else {
1457 			const int errnum = errno;
1458 			close(fd);
1459 			errno = errnum;
1460 		}
1461 	}
1462 	return -1;
1463 }
1464 
1465 /**
1466  * remove stat() from cache which haven't been stat()ed for
1467  * more than 2 seconds
1468  *
1469  *
1470  * walk though the stat-cache, collect the ids which are too old
1471  * and remove them in a second loop
1472  */
1473 
1474 static void stat_cache_tag_old_entries(splay_tree * const t, int * const keys, int * const ndx, const time_t max_age, const unix_time64_t cur_ts) {
1475     if (*ndx == 8192) return; /*(must match num array entries in keys[])*/
1476     if (t->left)
1477         stat_cache_tag_old_entries(t->left, keys, ndx, max_age, cur_ts);
1478     if (t->right)
1479         stat_cache_tag_old_entries(t->right, keys, ndx, max_age, cur_ts);
1480     if (*ndx == 8192) return; /*(must match num array entries in keys[])*/
1481 
1482     const stat_cache_entry * const sce = t->data;
1483     if (cur_ts - sce->stat_ts > max_age)
1484         keys[(*ndx)++] = t->key;
1485 }
1486 
1487 static void stat_cache_periodic_cleanup(const time_t max_age, const unix_time64_t cur_ts) {
1488     splay_tree *sptree = sc.files;
1489     int max_ndx, i;
1490     int keys[8192]; /* 32k size on stack */
1491     do {
1492         if (!sptree) break;
1493         max_ndx = 0;
1494         stat_cache_tag_old_entries(sptree, keys, &max_ndx, max_age, cur_ts);
1495         for (i = 0; i < max_ndx; ++i) {
1496             int ndx = keys[i];
1497             sptree = splaytree_splay(sptree, ndx);
1498             if (sptree && sptree->key == ndx) {
1499                 stat_cache_entry_free(sptree->data);
1500                 sptree = splaytree_delete(sptree, ndx);
1501             }
1502         }
1503     } while (max_ndx == sizeof(keys)/sizeof(int));
1504     sc.files = sptree;
1505 }
1506 
1507 void stat_cache_trigger_cleanup(void) {
1508 	time_t max_age = 2;
1509 
1510       #ifdef HAVE_FAM_H
1511 	if (STAT_CACHE_ENGINE_FAM == sc.stat_cache_engine) {
1512 		if (log_monotonic_secs & 0x1F) return;
1513 		/* once every 32 seconds (0x1F == 31) */
1514 		max_age = 32;
1515 		fam_dir_periodic_cleanup();
1516 		/* By doing this before stat_cache_periodic_cleanup(),
1517 		 * entries used within the next max_age secs will remain
1518 		 * monitored, instead of effectively flushing and
1519 		 * rebuilding the FAM monitoring every max_age seconds */
1520 	}
1521       #endif
1522 
1523 	stat_cache_periodic_cleanup(max_age, log_monotonic_secs);
1524 }
1525