xref: /lighttpd1.4/src/stat_cache.c (revision 2cdb8627)
1 #include "first.h"
2 
3 #include "stat_cache.h"
4 #include "log.h"
5 #include "fdevent.h"
6 #include "etag.h"
7 #include "splaytree.h"
8 
9 #include <sys/types.h>
10 #include <sys/stat.h>
11 
12 #include <stdlib.h>
13 #include <string.h>
14 #include <errno.h>
15 #include <unistd.h>
16 #include <fcntl.h>
17 
18 #if defined(HAVE_SYS_XATTR_H)
19 # include <sys/xattr.h>
20 #elif defined(HAVE_ATTR_ATTRIBUTES_H)
21 # include <attr/attributes.h>
22 #endif
23 
24 #ifdef HAVE_SYS_EXTATTR_H
25 # include <sys/extattr.h>
26 #endif
27 
28 #ifndef HAVE_LSTAT
29 #define lstat stat
30 #ifndef S_ISLNK
31 #define S_ISLNK(mode) (0)
32 #endif
33 #endif
34 
35 /*
36  * stat-cache
37  *
38  * - a splay-tree is used as we can use the caching effect of it
39  */
40 
41 enum {
42   STAT_CACHE_ENGINE_SIMPLE, /*(default)*/
43   STAT_CACHE_ENGINE_NONE,
44   STAT_CACHE_ENGINE_FAM
45 };
46 
47 struct stat_cache_fam;  /* declaration */
48 
49 typedef struct stat_cache {
50 	int stat_cache_engine;
51 	splay_tree *files; /* nodes of tree are (stat_cache_entry *) */
52 	struct stat_cache_fam *scf;
53 } stat_cache;
54 
55 static stat_cache sc;
56 
57 
58 static void * stat_cache_sptree_find(splay_tree ** const sptree,
59                                      const char * const name,
60                                      uint32_t len)
61 {
62     const int ndx = splaytree_djbhash(name, len);
63     *sptree = splaytree_splay(*sptree, ndx);
64     return (*sptree && (*sptree)->key == ndx) ? (*sptree)->data : NULL;
65 }
66 
67 
68 #ifdef HAVE_FAM_H
69 
70 /* monitor changes in directories using FAM
71  *
72  * This implementation employing FAM monitors directories as they are used,
73  * and maintains a reference count for cache use within stat_cache.c.
74  * A periodic job runs in lighttpd every 32 seconds, expiring entries unused
75  * in last 64 seconds out of the cache and cancelling FAM monitoring.  Items
76  * within the cache are checked against the filesystem upon use if last stat()
77  * was greater than or equal to 16 seconds ago.
78  *
79  * This implementation does not monitor every directory in a tree, and therefore
80  * the cache may get out-of-sync with the filesystem.  Delays in receiving and
81  * processing events from FAM might also lead to stale cache entries.
82  *
83  * For many websites, a large number of files are seldom, if ever, modified,
84  * and a common practice with images is to create a new file with a new name
85  * when a new version is needed, in order for client browsers and CDNs to better
86  * cache the content.  Given this, most use will see little difference in
87  * performance between server.stat-cache-engine = "fam" and "simple" (default).
88  * The default server.stat-cache-engine = "simple" calls stat() on a target once
89  * per second, and reuses that information until the next second.  For use where
90  * changes must be immediately visible, server.stat-cache-engine = "disable"
91  * should be used.
92  *
93  * When considering use of server.stat-cache-engine = "fam", there are a few
94  * additional limitations for this cache implementation using FAM.
95  * - symlinks to files located outside of the current directory do not result
96  *   in changes to that file being monitored (unless that file is in a directory
97  *   which is monitored as a result of a different request).  symlinks can be
98  *   chained and can be circular.  This implementation *does not* readlink() or
99  *   realpath() to resolve the chains to find and monitor the ultimate target
100  *   directory.  While symlinks to files located outside the current directory
101  *   are not monitored, symlinks to directories *are* monitored, though chains
102  *   of symlinks to directories do not result in monitoring of the directories
103  *   containing intermediate symlinks to the target directory.
104  * - directory rename of a directory which is not currently being monitored will
105  *   result in stale information in the cache if there is a subdirectory that is
106  *   being monitored.
107  * Even though lighttpd will not receive FAM events in the above cases, lighttpd
108  * does re-validate the information in the cache upon use if the cache entry has
109  * not been checked in 16 seconds, so that is the upper limit for use of stale
110  * data.
111  *
112  * Use of server.stat-cache-engine = "fam" is discouraged for extremely volatile
113  * directories such as temporary directories (e.g. /tmp and maybe /var/tmp) due
114  * to the overhead of processing the additional noise generated from changes.
115  * Related, server.stat-cache-engine = "fam" is not recommended on trees of
116  * untrusted files where a malicious user could generate an excess of change
117  * events.
118  *
119  * Internal note: lighttpd walks the caches to prune trees in stat_cache when an
120  * event is received for a directory (or symlink to a directory) which has been
121  * deleted or renamed.  The splaytree data structure is suboptimal for frequent
122  * changes of large directories trees where there have been a large number of
123  * different files recently accessed and part of the stat_cache.
124  */
125 
126 #include <fam.h>
127 
128 #ifdef HAVE_FAMNOEXISTS
129 #ifndef LIGHTTPD_STATIC
130 #include <dlfcn.h>
131 #endif
132 #endif
133 
134 typedef struct fam_dir_entry {
135 	buffer *name;
136 	int refcnt;
137 	FAMRequest req;
138 	time_t stat_ts;
139 	dev_t st_dev;
140 	ino_t st_ino;
141 	struct fam_dir_entry *fam_parent;
142 } fam_dir_entry;
143 
144 typedef struct stat_cache_fam {
145 	splay_tree *dirs; /* the nodes of the tree are fam_dir_entry */
146 	FAMConnection fam;
147 	log_error_st *errh;
148 	fdevents *ev;
149 	fdnode *fdn;
150 	int fd;
151 } stat_cache_fam;
152 
153 static fam_dir_entry * fam_dir_entry_init(const char *name, size_t len)
154 {
155     fam_dir_entry * const fam_dir = calloc(1, sizeof(*fam_dir));
156     force_assert(NULL != fam_dir);
157 
158     fam_dir->name = buffer_init();
159     buffer_copy_string_len(fam_dir->name, name, len);
160     fam_dir->refcnt = 0;
161 
162     return fam_dir;
163 }
164 
165 static void fam_dir_entry_free(fam_dir_entry *fam_dir)
166 {
167     if (!fam_dir) return;
168     /*(fam_dir->parent might be invalid pointer here; ignore)*/
169     buffer_free(fam_dir->name);
170     free(fam_dir);
171 }
172 
173 static void fam_dir_invalidate_node(fam_dir_entry *fam_dir)
174 {
175     fam_dir->stat_ts = 0;
176     if (fam_dir->fam_parent) {
177         --fam_dir->fam_parent->refcnt;
178         fam_dir->fam_parent = NULL;
179     }
180 }
181 
182 /*
183  * walk though splay_tree and collect contents of dir tree.
184  * remove tagged entries in a second loop
185  */
186 
187 static void fam_dir_tag_refcnt(splay_tree *t, int *keys, int *ndx)
188 {
189     if (*ndx == 8192) return; /*(must match num array entries in keys[])*/
190     if (t->left)  fam_dir_tag_refcnt(t->left,  keys, ndx);
191     if (t->right) fam_dir_tag_refcnt(t->right, keys, ndx);
192     if (*ndx == 8192) return; /*(must match num array entries in keys[])*/
193 
194     fam_dir_entry * const fam_dir = t->data;
195     if (0 == fam_dir->refcnt) {
196         fam_dir_invalidate_node(fam_dir);
197         keys[(*ndx)++] = t->key;
198     }
199 }
200 
201 __attribute_noinline__
202 static void fam_dir_periodic_cleanup() {
203     stat_cache_fam * const scf = sc.scf;
204     int max_ndx, i;
205     int keys[8192]; /* 32k size on stack */
206     do {
207         if (!scf->dirs) break;
208         max_ndx = 0;
209         fam_dir_tag_refcnt(scf->dirs, keys, &max_ndx);
210         for (i = 0; i < max_ndx; ++i) {
211             const int ndx = keys[i];
212             splay_tree *node = scf->dirs = splaytree_splay(scf->dirs, ndx);
213             if (node && node->key == ndx) {
214                 fam_dir_entry *fam_dir = node->data;
215                 scf->dirs = splaytree_delete(scf->dirs, ndx);
216                 FAMCancelMonitor(&scf->fam, &fam_dir->req);
217                 fam_dir_entry_free(fam_dir);
218             }
219         }
220     } while (max_ndx == sizeof(keys)/sizeof(int));
221 }
222 
223 static void fam_dir_invalidate_tree(splay_tree *t, const char *name, size_t len)
224 {
225   #ifdef __clang_analyzer__
226     force_assert(name);
227   #endif
228     /*force_assert(t);*/
229     if (t->left)  fam_dir_invalidate_tree(t->left,  name, len);
230     if (t->right) fam_dir_invalidate_tree(t->right, name, len);
231 
232     fam_dir_entry * const fam_dir = t->data;
233   #ifdef __clang_analyzer__
234     force_assert(fam_dir);
235   #endif
236     buffer *b = fam_dir->name;
237     size_t blen = buffer_string_length(b);
238     if (blen > len && b->ptr[len] == '/' && 0 == memcmp(b->ptr, name, len))
239         fam_dir_invalidate_node(fam_dir);
240 }
241 
242 /* declarations */
243 static void stat_cache_delete_tree(const char *name, uint32_t len);
244 static void stat_cache_invalidate_dir_tree(const char *name, size_t len);
245 
246 static void stat_cache_handle_fdevent_in(stat_cache_fam *scf)
247 {
248     for (int i = 0, ndx; i || (i = FAMPending(&scf->fam)) > 0; --i) {
249         FAMEvent fe;
250         if (FAMNextEvent(&scf->fam, &fe) < 0) break;
251 
252         /* ignore events which may have been pending for
253          * paths recently cancelled via FAMCancelMonitor() */
254         ndx = (int)(intptr_t)fe.userdata;
255         scf->dirs = splaytree_splay(scf->dirs, ndx);
256         if (!scf->dirs || scf->dirs->key != ndx) {
257             continue;
258         }
259         fam_dir_entry *fam_dir = scf->dirs->data;
260         if (FAMREQUEST_GETREQNUM(&fam_dir->req)
261             != FAMREQUEST_GETREQNUM(&fe.fr)) {
262             continue;
263         }
264 
265         if (fe.filename[0] != '/') {
266             buffer * const n = fam_dir->name;
267             fam_dir_entry *fam_link;
268             size_t len;
269             switch(fe.code) {
270             case FAMCreated:
271                 /* file created in monitored dir modifies dir and
272                  * we should get a separate FAMChanged event for dir.
273                  * Therefore, ignore file FAMCreated event here.
274                  * Also, if FAMNoExists() is used, might get spurious
275                  * FAMCreated events as changes are made e.g. in monitored
276                  * sub-sub-sub dirs and the library discovers new (already
277                  * existing) dir entries */
278                 continue;
279             case FAMChanged:
280                 /* file changed in monitored dir does not modify dir */
281             case FAMDeleted:
282             case FAMMoved:
283                 /* file deleted or moved in monitored dir modifies dir,
284                  * but FAM provides separate notification for that */
285 
286                 /* temporarily append filename to dir in fam_dir->name to
287                  * construct path, then delete stat_cache entry (if any)*/
288                 len = buffer_string_length(n);
289                 buffer_append_string_len(n, CONST_STR_LEN("/"));
290                 buffer_append_string_len(n,fe.filename,strlen(fe.filename));
291                 /* (alternatively, could chose to stat() and update)*/
292                 stat_cache_invalidate_entry(CONST_BUF_LEN(n));
293 
294                 fam_link = /*(check if might be symlink to monitored dir)*/
295                   stat_cache_sptree_find(&scf->dirs, CONST_BUF_LEN(n));
296                 if (fam_link && !buffer_is_equal(fam_link->name, n))
297                     fam_link = NULL;
298 
299                 buffer_string_set_length(n, len);
300 
301                 if (fam_link) {
302                     /* replaced symlink changes containing dir */
303                     stat_cache_invalidate_entry(CONST_BUF_LEN(n));
304                     /* handle symlink to dir as deleted dir below */
305                     fe.code = FAMDeleted;
306                     fam_dir = fam_link;
307                     break;
308                 }
309                 continue;
310             default:
311                 continue;
312             }
313         }
314 
315         switch(fe.code) {
316         case FAMChanged:
317             stat_cache_invalidate_entry(CONST_BUF_LEN(fam_dir->name));
318             break;
319         case FAMDeleted:
320         case FAMMoved:
321             stat_cache_delete_tree(CONST_BUF_LEN(fam_dir->name));
322             fam_dir_invalidate_node(fam_dir);
323             if (scf->dirs)
324                 fam_dir_invalidate_tree(scf->dirs,CONST_BUF_LEN(fam_dir->name));
325             fam_dir_periodic_cleanup();
326             break;
327         default:
328             break;
329         }
330     }
331 }
332 
333 static handler_t stat_cache_handle_fdevent(void *ctx, int revent)
334 {
335 	stat_cache_fam * const scf = ctx; /* sc.scf */
336 
337 	if (revent & FDEVENT_IN) {
338 		stat_cache_handle_fdevent_in(scf);
339 	}
340 
341 	if (revent & (FDEVENT_HUP|FDEVENT_RDHUP)) {
342 		/* fam closed the connection */
343 		log_error(scf->errh, __FILE__, __LINE__,
344 		  "FAM connection closed; disabling stat_cache.");
345 		/* (although effectively STAT_CACHE_ENGINE_NONE,
346 		 *  do not change here so that periodic jobs clean up memory)*/
347 		/*sc.stat_cache_engine = STAT_CACHE_ENGINE_NONE; */
348 		fdevent_fdnode_event_del(scf->ev, scf->fdn);
349 		fdevent_unregister(scf->ev, scf->fd);
350 		scf->fdn = NULL;
351 
352 		FAMClose(&scf->fam);
353 		scf->fd = -1;
354 	}
355 
356 	return HANDLER_GO_ON;
357 }
358 
359 static stat_cache_fam * stat_cache_init_fam(fdevents *ev, log_error_st *errh) {
360 	stat_cache_fam *scf = calloc(1, sizeof(*scf));
361 	force_assert(scf);
362 	scf->fd = -1;
363 	scf->ev = ev;
364 	scf->errh = errh;
365 
366 	/* setup FAM */
367 	if (0 != FAMOpen2(&scf->fam, "lighttpd")) {
368 		log_error(errh, __FILE__, __LINE__,
369 		  "could not open a fam connection, dying.");
370 		return NULL;
371 	}
372       #ifdef HAVE_FAMNOEXISTS
373       #ifdef LIGHTTPD_STATIC
374 	FAMNoExists(&scf->fam);
375       #else
376 	int (*FAMNoExists_fn)(FAMConnection *);
377 	FAMNoExists_fn =
378 	  (int (*)(FAMConnection *))(intptr_t)dlsym(RTLD_DEFAULT,"FAMNoExists");
379 	if (FAMNoExists_fn) FAMNoExists_fn(&scf->fam);
380       #endif
381       #endif
382 
383 	scf->fd = FAMCONNECTION_GETFD(&scf->fam);
384 	fdevent_setfd_cloexec(scf->fd);
385 	scf->fdn = fdevent_register(scf->ev, scf->fd, stat_cache_handle_fdevent, scf);
386 	fdevent_fdnode_event_set(scf->ev, scf->fdn, FDEVENT_IN | FDEVENT_RDHUP);
387 
388 	return scf;
389 }
390 
391 static void stat_cache_free_fam(stat_cache_fam *scf) {
392 	if (NULL == scf) return;
393 
394 	while (scf->dirs) {
395 		/*(skip entry invalidation and FAMCancelMonitor())*/
396 		splay_tree *node = scf->dirs;
397 		fam_dir_entry_free((fam_dir_entry *)node->data);
398 		scf->dirs = splaytree_delete(scf->dirs, node->key);
399 	}
400 
401 	if (-1 != scf->fd) {
402 		/*scf->fdn already cleaned up in fdevent_free()*/
403 		FAMClose(&scf->fam);
404 		/*scf->fd = -1;*/
405 	}
406 
407 	free(scf);
408 }
409 
410 static fam_dir_entry * fam_dir_monitor(stat_cache_fam *scf, char *fn, uint32_t dirlen, struct stat *st)
411 {
412     if (NULL == scf->fdn) return NULL; /* FAM connection closed; do nothing */
413     const int fn_is_dir = S_ISDIR(st->st_mode);
414     /*force_assert(0 != dirlen);*/
415     /*force_assert(fn[0] == '/');*/
416     /* consistency: ensure fn does not end in '/' unless root "/"
417      * FAM events will not end in '/', so easier to match this way */
418     if (fn[dirlen-1] == '/') --dirlen;
419     if (0 == dirlen) dirlen = 1; /* root dir ("/") */
420     /* Note: paths are expected to be normalized before calling stat_cache,
421      * e.g. without repeated '/' */
422     if (!fn_is_dir) {
423         while (fn[--dirlen] != '/') ;
424         if (0 == dirlen) dirlen = 1; /*(should not happen for file)*/
425     }
426     int dir_ndx = splaytree_djbhash(fn, dirlen);
427     fam_dir_entry *fam_dir = NULL;
428 
429     scf->dirs = splaytree_splay(scf->dirs, dir_ndx);
430     if (NULL != scf->dirs && scf->dirs->key == dir_ndx) {
431         fam_dir = scf->dirs->data;
432         if (!buffer_is_equal_string(fam_dir->name, fn, dirlen)) {
433             /* hash collision; preserve existing
434              * do not monitor new to avoid cache thrashing */
435             return NULL;
436         }
437         /* directory already registered */
438     }
439 
440     const time_t cur_ts = log_epoch_secs;
441     struct stat lst;
442     int ck_dir = fn_is_dir;
443     if (!fn_is_dir && (NULL==fam_dir || cur_ts - fam_dir->stat_ts >= 16)) {
444         ck_dir = 1;
445         /*(temporarily modify fn)*/
446         fn[dirlen] = '\0';
447         if (0 != lstat(fn, &lst)) {
448             fn[dirlen] = '/';
449             return NULL;
450         }
451         if (!S_ISLNK(lst.st_mode)) {
452             st = &lst;
453         }
454         else if (0 != stat(fn, st)) { /*st passed in now is stat() of dir*/
455             fn[dirlen] = '/';
456             return NULL;
457         }
458         fn[dirlen] = '/';
459     }
460 
461     int ck_lnk = (NULL == fam_dir);
462     if (ck_dir && NULL != fam_dir) {
463         /* check stat() matches device and inode, just in case an external event
464          * not being monitored occurs (e.g. rename of unmonitored parent dir)*/
465         if (st->st_dev != fam_dir->st_dev || st->st_ino != fam_dir->st_ino) {
466             ck_lnk = 1;
467             /*(modifies scf->dirs but no need to re-splay for dir_ndx since
468              * fam_dir is not NULL and so splaytree_insert not called below)*/
469             if (scf->dirs) fam_dir_invalidate_tree(scf->dirs, fn, dirlen);
470             if (!fn_is_dir) /*(if dir, caller is updating stat_cache_entry)*/
471                 stat_cache_update_entry(fn, dirlen, st, NULL);
472             /*(must not delete tree since caller is holding a valid node)*/
473             stat_cache_invalidate_dir_tree(fn, dirlen);
474             if (0 != FAMCancelMonitor(&scf->fam, &fam_dir->req)
475                 || 0 != FAMMonitorDirectory(&scf->fam, fam_dir->name->ptr,
476                                             &fam_dir->req,
477                                             (void *)(intptr_t)dir_ndx)) {
478                 fam_dir->stat_ts = 0; /* invalidate */
479                 return NULL;
480             }
481             fam_dir->st_dev = st->st_dev;
482             fam_dir->st_ino = st->st_ino;
483         }
484         fam_dir->stat_ts = cur_ts;
485     }
486 
487     if (NULL == fam_dir) {
488         fam_dir = fam_dir_entry_init(fn, dirlen);
489 
490         if (0 != FAMMonitorDirectory(&scf->fam,fam_dir->name->ptr,&fam_dir->req,
491                                      (void *)(intptr_t)dir_ndx)) {
492             log_error(scf->errh, __FILE__, __LINE__,
493               "monitoring dir failed: %s file: %s %s",
494               fam_dir->name->ptr, fn, FamErrlist[FAMErrno]);
495             fam_dir_entry_free(fam_dir);
496             return NULL;
497         }
498 
499         scf->dirs = splaytree_insert(scf->dirs, dir_ndx, fam_dir);
500         fam_dir->stat_ts= cur_ts;
501         fam_dir->st_dev = st->st_dev;
502         fam_dir->st_ino = st->st_ino;
503     }
504 
505     if (ck_lnk) {
506         if (fn_is_dir) {
507             /*(temporarily modify fn)*/
508             char e = fn[dirlen];
509             fn[dirlen] = '\0';
510             if (0 != lstat(fn, &lst)) {
511                 fn[dirlen] = e;
512                 return NULL;
513             }
514             fn[dirlen] = e;
515         }
516         if (fam_dir->fam_parent) {
517             --fam_dir->fam_parent->refcnt;
518             fam_dir->fam_parent = NULL;
519         }
520         if (S_ISLNK(lst.st_mode)) {
521             fam_dir->fam_parent = fam_dir_monitor(scf, fn, dirlen, &lst);
522         }
523     }
524 
525     ++fam_dir->refcnt;
526     return fam_dir;
527 }
528 
529 #endif
530 
531 
532 static stat_cache_entry * stat_cache_entry_init(void) {
533     stat_cache_entry *sce = calloc(1, sizeof(*sce));
534     force_assert(NULL != sce);
535     return sce;
536 }
537 
538 static void stat_cache_entry_free(void *data) {
539     stat_cache_entry *sce = data;
540     if (!sce) return;
541 
542   #ifdef HAVE_FAM_H
543     /*(decrement refcnt only;
544      * defer cancelling FAM monitor on dir even if refcnt reaches zero)*/
545     if (sce->fam_dir) --((fam_dir_entry *)sce->fam_dir)->refcnt;
546   #endif
547 
548     free(sce->name.ptr);
549     free(sce->etag.ptr);
550     if (sce->content_type.size) free(sce->content_type.ptr);
551 
552     free(sce);
553 }
554 
555 #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
556 
557 static const char *attrname = "Content-Type";
558 static char attrval[128];
559 static buffer attrb = { attrval, 0, 0 };
560 
561 static int stat_cache_attr_get(const char *name) {
562   #if defined(HAVE_XATTR)
563    #if defined(HAVE_SYS_XATTR_H)
564     ssize_t attrlen;
565     if (0 < (attrlen = getxattr(name, attrname,
566                                 attrval, sizeof(attrval)-1)))
567    #else
568     int attrlen = sizeof(attrval)-1;
569     if (0 == attr_get(name, attrname, attrval, &attrlen, 0))
570    #endif
571   #elif defined(HAVE_EXTATTR)
572     ssize_t attrlen;
573     if (0 < (attrlen = extattr_get_file(name, EXTATTR_NAMESPACE_USER, attrname,
574                                         attrval, sizeof(attrval)-1)))
575   #endif
576     {
577         attrval[attrlen] = '\0';
578         attrb.used = (uint32_t)(attrlen + 1);
579         return 1;
580     }
581     return 0;
582 }
583 
584 #endif
585 
586 int stat_cache_init(fdevents *ev, log_error_st *errh) {
587   #ifdef HAVE_FAM_H
588     if (sc.stat_cache_engine == STAT_CACHE_ENGINE_FAM) {
589         sc.scf = stat_cache_init_fam(ev, errh);
590         if (NULL == sc.scf) return 0;
591     }
592   #else
593     UNUSED(ev);
594     UNUSED(errh);
595   #endif
596 
597     return 1;
598 }
599 
600 void stat_cache_free(void) {
601     splay_tree *sptree = sc.files;
602     while (sptree) {
603         stat_cache_entry_free(sptree->data);
604         sptree = splaytree_delete(sptree, sptree->key);
605     }
606     sc.files = NULL;
607 
608   #ifdef HAVE_FAM_H
609     stat_cache_free_fam(sc.scf);
610     sc.scf = NULL;
611   #endif
612 
613   #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
614     attrname = "Content-Type";
615   #endif
616 
617     sc.stat_cache_engine = STAT_CACHE_ENGINE_SIMPLE; /*(default)*/
618 }
619 
620 void stat_cache_xattrname (const char *name) {
621   #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
622     attrname = name;
623   #else
624     UNUSED(name);
625   #endif
626 }
627 
628 int stat_cache_choose_engine (const buffer *stat_cache_string, log_error_st *errh) {
629     if (buffer_string_is_empty(stat_cache_string))
630         sc.stat_cache_engine = STAT_CACHE_ENGINE_SIMPLE;
631     else if (buffer_eq_slen(stat_cache_string, CONST_STR_LEN("simple")))
632         sc.stat_cache_engine = STAT_CACHE_ENGINE_SIMPLE;
633 #ifdef HAVE_FAM_H
634     else if (buffer_eq_slen(stat_cache_string, CONST_STR_LEN("fam")))
635         sc.stat_cache_engine = STAT_CACHE_ENGINE_FAM;
636 #endif
637     else if (buffer_eq_slen(stat_cache_string, CONST_STR_LEN("disable")))
638         sc.stat_cache_engine = STAT_CACHE_ENGINE_NONE;
639     else {
640         log_error(errh, __FILE__, __LINE__,
641           "server.stat-cache-engine can be one of \"disable\", \"simple\","
642 #ifdef HAVE_FAM_H
643           " \"fam\","
644 #endif
645           " but not: %s", stat_cache_string->ptr);
646         return -1;
647     }
648     return 0;
649 }
650 
651 const buffer * stat_cache_mimetype_by_ext(const array * const mimetypes, const char * const name, const uint32_t nlen)
652 {
653     const char * const end = name + nlen; /*(end of string)*/
654     const uint32_t used = mimetypes->used;
655     if (used < 16) {
656         for (uint32_t i = 0; i < used; ++i) {
657             /* suffix match */
658             const data_string *ds = (data_string *)mimetypes->data[i];
659             const size_t klen = buffer_string_length(&ds->key);
660             if (klen <= nlen && buffer_eq_icase_ssn(end-klen, ds->key.ptr, klen))
661                 return &ds->value;
662         }
663     }
664     else {
665         const char *s;
666         const data_string *ds;
667         if (nlen) {
668             for (s = end-1; s != name && *s != '/'; --s) ; /*(like memrchr())*/
669             if (*s == '/') ++s;
670         }
671         else {
672             s = name;
673         }
674         /* search for basename, then longest .ext2.ext1, then .ext1, then "" */
675         ds = (const data_string *)array_get_element_klen(mimetypes, s, end - s);
676         if (NULL != ds) return &ds->value;
677         while (++s < end) {
678             while (*s != '.' && ++s != end) ;
679             if (s == end) break;
680             /* search ".ext" then "ext" */
681             ds = (const data_string *)array_get_element_klen(mimetypes, s, end - s);
682             if (NULL != ds) return &ds->value;
683             /* repeat search without leading '.' to handle situation where
684              * admin configured mimetype.assign keys without leading '.' */
685             if (++s < end) {
686                 if (*s == '.') { --s; continue; }
687                 ds = (const data_string *)array_get_element_klen(mimetypes, s, end - s);
688                 if (NULL != ds) return &ds->value;
689             }
690         }
691         /* search for ""; catchall */
692         ds = (const data_string *)array_get_element_klen(mimetypes, CONST_STR_LEN(""));
693         if (NULL != ds) return &ds->value;
694     }
695 
696     return NULL;
697 }
698 
699 #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
700 
701 const buffer * stat_cache_mimetype_by_xattr(const char * const name)
702 {
703     return stat_cache_attr_get(name) ? &attrb : NULL;
704 }
705 
706 const buffer * stat_cache_content_type_get_by_xattr(stat_cache_entry *sce, const array *mimetypes, int use_xattr)
707 {
708     /*(invalid caching if user config has multiple, different
709      * r->conf.mimetypes for same extension (not expected))*/
710     if (!buffer_string_is_empty(&sce->content_type)) return &sce->content_type;
711 
712     if (!S_ISREG(sce->st.st_mode)) return NULL;
713 
714     /* cache mimetype */
715     const buffer *mtype =
716       (use_xattr) ? stat_cache_mimetype_by_xattr(sce->name.ptr) : NULL;
717     if (NULL == mtype)
718         mtype = stat_cache_mimetype_by_ext(mimetypes,CONST_BUF_LEN(&sce->name));
719     if (NULL != mtype) {
720         if (sce->content_type.size) {
721             buffer_copy_buffer(&sce->content_type, mtype);
722         }
723         else if (mtype == &attrb) {
724             sce->content_type.ptr = NULL;
725             buffer_copy_buffer(&sce->content_type, mtype);
726         }
727         else {
728             /*(copy pointers from mimetypes array; avoid allocation)*/
729             sce->content_type.ptr = mtype->ptr;
730             sce->content_type.used = mtype->used;
731             /*(leave sce->content_type.size = 0 to flag not-allocated)*/
732         }
733     }
734     else
735         buffer_clear(&sce->content_type);
736 
737     return &sce->content_type;
738 }
739 
740 #else
741 
742 const buffer * stat_cache_content_type_get_by_ext(stat_cache_entry *sce, const array *mimetypes)
743 {
744     /*(invalid caching if user config has multiple, different
745      * r->conf.mimetypes for same extension (not expected))*/
746     if (!buffer_string_is_empty(&sce->content_type)) return &sce->content_type;
747 
748     if (!S_ISREG(sce->st.st_mode)) return NULL;
749 
750     /* cache mimetype */
751     const buffer * const mtype =
752       stat_cache_mimetype_by_ext(mimetypes, CONST_BUF_LEN(&sce->name));
753     if (NULL != mtype) {
754         /*(copy pointers from mimetypes array; avoid allocation)*/
755         sce->content_type.ptr = mtype->ptr;
756         sce->content_type.used = mtype->used;
757         /*(leave sce->content_type.size = 0 to flag not-allocated)*/
758     }
759     else
760         buffer_clear(&sce->content_type);
761 
762     return &sce->content_type;
763 }
764 
765 #endif
766 
767 const buffer * stat_cache_etag_get(stat_cache_entry *sce, int flags) {
768     /*(invalid caching if user cfg has multiple, different r->conf.etag_flags
769      * for same path (not expected, since etag flags should be by filesystem))*/
770     if (!buffer_string_is_empty(&sce->etag)) return &sce->etag;
771 
772     if (S_ISREG(sce->st.st_mode) || S_ISDIR(sce->st.st_mode)) {
773         if (0 == flags) return NULL;
774         etag_create(&sce->etag, &sce->st, flags);
775         return &sce->etag;
776     }
777 
778     return NULL;
779 }
780 
781 void stat_cache_update_entry(const char *name, uint32_t len,
782                              struct stat *st, buffer *etagb)
783 {
784     if (sc.stat_cache_engine == STAT_CACHE_ENGINE_NONE) return;
785     force_assert(0 != len);
786     if (name[len-1] == '/') { if (0 == --len) len = 1; }
787     splay_tree **sptree = &sc.files;
788     stat_cache_entry *sce =
789       stat_cache_sptree_find(sptree, name, len);
790     if (sce && buffer_is_equal_string(&sce->name, name, len)) {
791         sce->stat_ts = log_epoch_secs;
792         sce->st = *st; /* etagb might be NULL to clear etag (invalidate) */
793         buffer_copy_string_len(&sce->etag, CONST_BUF_LEN(etagb));
794       #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
795         buffer_clear(&sce->content_type);
796       #endif
797     }
798 }
799 
800 void stat_cache_delete_entry(const char *name, uint32_t len)
801 {
802     if (sc.stat_cache_engine == STAT_CACHE_ENGINE_NONE) return;
803     force_assert(0 != len);
804     if (name[len-1] == '/') { if (0 == --len) len = 1; }
805     splay_tree **sptree = &sc.files;
806     stat_cache_entry *sce = stat_cache_sptree_find(sptree, name, len);
807     if (sce && buffer_is_equal_string(&sce->name, name, len)) {
808         stat_cache_entry_free(sce);
809         *sptree = splaytree_delete(*sptree, (*sptree)->key);
810     }
811 }
812 
813 void stat_cache_invalidate_entry(const char *name, uint32_t len)
814 {
815     splay_tree **sptree = &sc.files;
816     stat_cache_entry *sce = stat_cache_sptree_find(sptree, name, len);
817     if (sce && buffer_is_equal_string(&sce->name, name, len)) {
818         sce->stat_ts = 0;
819       #ifdef HAVE_FAM_H
820         if (sce->fam_dir != NULL) {
821             --((fam_dir_entry *)sce->fam_dir)->refcnt;
822             sce->fam_dir = NULL;
823         }
824       #endif
825     }
826 }
827 
828 #ifdef HAVE_FAM_H
829 
830 static void stat_cache_invalidate_dir_tree_walk(splay_tree *t,
831                                                 const char *name, size_t len)
832 {
833     if (t->left)  stat_cache_invalidate_dir_tree_walk(t->left,  name, len);
834     if (t->right) stat_cache_invalidate_dir_tree_walk(t->right, name, len);
835 
836     buffer *b = &((stat_cache_entry *)t->data)->name;
837     size_t blen = buffer_string_length(b);
838     if (blen > len && b->ptr[len] == '/' && 0 == memcmp(b->ptr, name, len)) {
839         stat_cache_entry *sce = t->data;
840         sce->stat_ts = 0;
841         if (sce->fam_dir != NULL) {
842             --((fam_dir_entry *)sce->fam_dir)->refcnt;
843             sce->fam_dir = NULL;
844         }
845     }
846 }
847 
848 static void stat_cache_invalidate_dir_tree(const char *name, size_t len)
849 {
850     splay_tree * const sptree = sc.files;
851     if (sptree) stat_cache_invalidate_dir_tree_walk(sptree, name, len);
852 }
853 
854 #endif
855 
856 /*
857  * walk though splay_tree and collect contents of dir tree.
858  * remove tagged entries in a second loop
859  */
860 
861 static void stat_cache_tag_dir_tree(splay_tree *t, const char *name, size_t len,
862                                     int *keys, int *ndx)
863 {
864     if (*ndx == 8192) return; /*(must match num array entries in keys[])*/
865     if (t->left)  stat_cache_tag_dir_tree(t->left,  name, len, keys, ndx);
866     if (t->right) stat_cache_tag_dir_tree(t->right, name, len, keys, ndx);
867     if (*ndx == 8192) return; /*(must match num array entries in keys[])*/
868 
869     buffer *b = &((stat_cache_entry *)t->data)->name;
870     size_t blen = buffer_string_length(b);
871     if (blen > len && b->ptr[len] == '/' && 0 == memcmp(b->ptr, name, len))
872         keys[(*ndx)++] = t->key;
873 }
874 
875 __attribute_noinline__
876 static void stat_cache_prune_dir_tree(const char *name, size_t len)
877 {
878     splay_tree *sptree = sc.files;
879     int max_ndx, i;
880     int keys[8192]; /* 32k size on stack */
881     do {
882         if (!sptree) break;
883         max_ndx = 0;
884         stat_cache_tag_dir_tree(sptree, name, len, keys, &max_ndx);
885         for (i = 0; i < max_ndx; ++i) {
886             const int ndx = keys[i];
887             splay_tree *node = sptree = splaytree_splay(sptree, ndx);
888             if (node && node->key == ndx) {
889                 stat_cache_entry_free(node->data);
890                 sptree = splaytree_delete(sptree, ndx);
891             }
892         }
893     } while (max_ndx == sizeof(keys)/sizeof(int));
894     sc.files = sptree;
895 }
896 
897 static void stat_cache_delete_tree(const char *name, uint32_t len)
898 {
899     stat_cache_delete_entry(name, len);
900     stat_cache_prune_dir_tree(name, len);
901 }
902 
903 void stat_cache_delete_dir(const char *name, uint32_t len)
904 {
905     force_assert(0 != len);
906     if (name[len-1] == '/') { if (0 == --len) len = 1; }
907     stat_cache_delete_tree(name, len);
908   #ifdef HAVE_FAM_H
909     if (sc.stat_cache_engine == STAT_CACHE_ENGINE_FAM) {
910         splay_tree **sptree = &sc.scf->dirs;
911         fam_dir_entry *fam_dir = stat_cache_sptree_find(sptree, name, len);
912         if (fam_dir && buffer_is_equal_string(fam_dir->name, name, len))
913             fam_dir_invalidate_node(fam_dir);
914         if (*sptree) fam_dir_invalidate_tree(*sptree, name, len);
915         fam_dir_periodic_cleanup();
916     }
917   #endif
918 }
919 
920 /***
921  *
922  *
923  *
924  * returns:
925  *  - HANDLER_FINISHED on cache-miss (don't forget to reopen the file)
926  *  - HANDLER_ERROR on stat() failed -> see errno for problem
927  */
928 
929 stat_cache_entry * stat_cache_get_entry(const buffer *name) {
930 	stat_cache_entry *sce = NULL;
931 	struct stat st;
932 	int file_ndx;
933 
934 	/* consistency: ensure lookup name does not end in '/' unless root "/"
935 	 * (but use full path given with stat(), even with trailing '/') */
936 	int final_slash = 0;
937 	size_t len = buffer_string_length(name);
938 	force_assert(0 != len);
939 	if (name->ptr[len-1] == '/') { final_slash = 1; if (0 == --len) len = 1; }
940 	/* Note: paths are expected to be normalized before calling stat_cache,
941 	 * e.g. without repeated '/' */
942 
943 	if (name->ptr[0] != '/') {
944 		errno = EINVAL;
945 		return NULL;
946 	}
947 
948 	/*
949 	 * check if the directory for this file has changed
950 	 */
951 
952 	const time_t cur_ts = log_epoch_secs;
953 
954 	file_ndx = splaytree_djbhash(name->ptr, len);
955 	splay_tree * const sptree = sc.files = splaytree_splay(sc.files, file_ndx);
956 
957 	if (sptree && (sptree->key == file_ndx)) {
958 		/* we have seen this file already and
959 		 * don't stat() it again in the same second */
960 
961 		sce = sptree->data;
962 
963 		/* check if the name is the same, we might have a collision */
964 
965 		if (buffer_is_equal_string(&sce->name, name->ptr, len)) {
966 			if (sc.stat_cache_engine == STAT_CACHE_ENGINE_SIMPLE) {
967 				if (sce->stat_ts == cur_ts) {
968 					if (final_slash && !S_ISDIR(sce->st.st_mode)) {
969 						errno = ENOTDIR;
970 						return NULL;
971 					}
972 					return sce;
973 				}
974 			}
975 		      #ifdef HAVE_FAM_H
976 			else if (sc.stat_cache_engine == STAT_CACHE_ENGINE_FAM
977 				 && sce->fam_dir) { /* entry is in monitored dir */
978 				/* re-stat() periodically, even if monitoring for changes
979 				 * (due to limitations in stat_cache.c use of FAM)
980 				 * (gaps due to not continually monitoring an entire tree) */
981 				if (cur_ts - sce->stat_ts < 16) {
982 					if (final_slash && !S_ISDIR(sce->st.st_mode)) {
983 						errno = ENOTDIR;
984 						return NULL;
985 					}
986 					return sce;
987 				}
988 			}
989 		      #endif
990 		} else {
991 			/* collision, forget about the entry */
992 			sce = NULL;
993 		}
994 	}
995 
996 	if (-1 == stat(name->ptr, &st)) {
997 		return NULL;
998 	}
999 
1000 	if (S_ISREG(st.st_mode)) {
1001 		/* fix broken stat/open for symlinks to reg files with appended slash on freebsd,osx */
1002 		if (name->ptr[buffer_string_length(name) - 1] == '/') {
1003 			errno = ENOTDIR;
1004 			return NULL;
1005 		}
1006 	}
1007 
1008 	if (NULL == sce) {
1009 
1010 		sce = stat_cache_entry_init();
1011 		buffer_copy_string_len(&sce->name, name->ptr, len);
1012 
1013 		/* already splayed file_ndx */
1014 		if (NULL != sptree && sptree->key == file_ndx) {
1015 			/* hash collision: replace old entry */
1016 			stat_cache_entry_free(sptree->data);
1017 			sptree->data = sce;
1018 		} else {
1019 			sc.files = splaytree_insert(sptree, file_ndx, sce);
1020 		}
1021 
1022 	} else {
1023 
1024 		buffer_clear(&sce->etag);
1025 	      #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
1026 		buffer_clear(&sce->content_type);
1027 	      #endif
1028 
1029 	}
1030 
1031 	sce->st = st; /*(copy prior to calling fam_dir_monitor())*/
1032 
1033 #ifdef HAVE_FAM_H
1034 	if (sc.stat_cache_engine == STAT_CACHE_ENGINE_FAM) {
1035 		if (sce->fam_dir) --((fam_dir_entry *)sce->fam_dir)->refcnt;
1036 		sce->fam_dir =
1037 		  fam_dir_monitor(sc.scf, CONST_BUF_LEN(name), &st);
1038 	      #if 0 /*(performed below)*/
1039 		if (NULL != sce->fam_dir) {
1040 			/*(may have been invalidated by dir change)*/
1041 			sce->stat_ts = cur_ts;
1042 		}
1043 	      #endif
1044 	}
1045 #endif
1046 
1047 	sce->stat_ts = cur_ts;
1048 	return sce;
1049 }
1050 
1051 int stat_cache_path_contains_symlink(const buffer *name, log_error_st *errh) {
1052     /* caller should check for symlinks only if we should block symlinks. */
1053 
1054     /* catch the obvious symlinks
1055      *
1056      * this is not a secure check as we still have a race-condition between
1057      * the stat() and the open. We can only solve this by
1058      * 1. open() the file
1059      * 2. fstat() the fd
1060      *
1061      * and keeping the file open for the rest of the time. But this can
1062      * only be done at network level.
1063      * */
1064 
1065   #ifdef HAVE_LSTAT
1066     /* we assume "/" can not be symlink,
1067      * so skip the symlink stuff if path is "/" */
1068     size_t len = buffer_string_length(name);
1069     force_assert(0 != len);
1070     force_assert(name->ptr[0] == '/');
1071     if (1 == len) return 0;
1072    #ifndef PATH_MAX
1073    #define PATH_MAX 4096
1074    #endif
1075     if (len >= PATH_MAX) return -1;
1076 
1077     char buf[PATH_MAX];
1078     memcpy(buf, name->ptr, len);
1079     char *s_cur = buf+len;
1080     do {
1081         *s_cur = '\0';
1082         struct stat st;
1083         if (0 == lstat(buf, &st)) {
1084             if (S_ISLNK(st.st_mode)) return 1;
1085         }
1086         else {
1087             log_perror(errh, __FILE__, __LINE__, "lstat failed for: %s", buf);
1088             return -1;
1089         }
1090     } while ((s_cur = strrchr(buf, '/')) > buf); /*(&buf[0]==buf; NULL < buf)*/
1091   #endif
1092 
1093     return 0;
1094 }
1095 
1096 int stat_cache_open_rdonly_fstat (const buffer *name, struct stat *st, int symlinks) {
1097 	/*(Note: O_NOFOLLOW affects only the final path segment, the target file,
1098 	 * not any intermediate symlinks along the path)*/
1099 	const int fd = fdevent_open_cloexec(name->ptr, symlinks, O_RDONLY, 0);
1100 	if (fd >= 0) {
1101 		if (0 == fstat(fd, st)) {
1102 			return fd;
1103 		} else {
1104 			close(fd);
1105 		}
1106 	}
1107 	return -1;
1108 }
1109 
1110 /**
1111  * remove stat() from cache which haven't been stat()ed for
1112  * more than 2 seconds
1113  *
1114  *
1115  * walk though the stat-cache, collect the ids which are too old
1116  * and remove them in a second loop
1117  */
1118 
1119 static void stat_cache_tag_old_entries(splay_tree * const t, int * const keys, int * const ndx, const time_t max_age, const time_t cur_ts) {
1120     if (*ndx == 8192) return; /*(must match num array entries in keys[])*/
1121     if (t->left)
1122         stat_cache_tag_old_entries(t->left, keys, ndx, max_age, cur_ts);
1123     if (t->right)
1124         stat_cache_tag_old_entries(t->right, keys, ndx, max_age, cur_ts);
1125     if (*ndx == 8192) return; /*(must match num array entries in keys[])*/
1126 
1127     const stat_cache_entry * const sce = t->data;
1128     if (cur_ts - sce->stat_ts > max_age)
1129         keys[(*ndx)++] = t->key;
1130 }
1131 
1132 static void stat_cache_periodic_cleanup(const time_t max_age, const time_t cur_ts) {
1133     splay_tree *sptree = sc.files;
1134     int max_ndx, i;
1135     int keys[8192]; /* 32k size on stack */
1136     do {
1137         if (!sptree) break;
1138         max_ndx = 0;
1139         stat_cache_tag_old_entries(sptree, keys, &max_ndx, max_age, cur_ts);
1140         for (i = 0; i < max_ndx; ++i) {
1141             int ndx = keys[i];
1142             sptree = splaytree_splay(sptree, ndx);
1143             if (sptree && sptree->key == ndx) {
1144                 stat_cache_entry_free(sptree->data);
1145                 sptree = splaytree_delete(sptree, ndx);
1146             }
1147         }
1148     } while (max_ndx == sizeof(keys)/sizeof(int));
1149     sc.files = sptree;
1150 }
1151 
1152 void stat_cache_trigger_cleanup(void) {
1153 	time_t max_age = 2;
1154 
1155       #ifdef HAVE_FAM_H
1156 	if (STAT_CACHE_ENGINE_FAM == sc.stat_cache_engine) {
1157 		if (log_epoch_secs & 0x1F) return;
1158 		/* once every 32 seconds (0x1F == 31) */
1159 		max_age = 32;
1160 		fam_dir_periodic_cleanup();
1161 		/* By doing this before stat_cache_periodic_cleanup(),
1162 		 * entries used within the next max_age secs will remain
1163 		 * monitored, instead of effectively flushing and
1164 		 * rebuilding the FAM monitoring every max_age seconds */
1165 	}
1166       #endif
1167 
1168 	stat_cache_periodic_cleanup(max_age, log_epoch_secs);
1169 }
1170