1 #include "first.h"
2
3 #include "stat_cache.h"
4 #include "log.h"
5 #include "fdevent.h"
6 #include "http_etag.h"
7 #include "algo_splaytree.h"
8
9 #include <sys/types.h>
10 #include <sys/stat.h>
11
12 #include <stdlib.h>
13 #include <string.h>
14 #include <errno.h>
15 #include <unistd.h>
16 #include <fcntl.h>
17
18 #if defined(HAVE_SYS_XATTR_H)
19 # include <sys/xattr.h>
20 #elif defined(HAVE_ATTR_ATTRIBUTES_H)
21 # include <attr/attributes.h>
22 #endif
23
24 #ifdef HAVE_SYS_EXTATTR_H
25 # include <sys/extattr.h>
26 #endif
27
28 #ifndef HAVE_LSTAT
29 #define lstat stat
30 #ifndef S_ISLNK
31 #define S_ISLNK(mode) (0)
32 #endif
33 #endif
34
35 /*
36 * stat-cache
37 *
38 * - a splay-tree is used as we can use the caching effect of it
39 */
40
41 enum {
42 STAT_CACHE_ENGINE_SIMPLE = 0 /*(default)*/
43 ,STAT_CACHE_ENGINE_NONE = 1
44 ,STAT_CACHE_ENGINE_FAM = 2 /* same as STAT_CACHE_ENGINE_INOTIFY */
45 ,STAT_CACHE_ENGINE_INOTIFY = 2 /* same as STAT_CACHE_ENGINE_FAM */
46 ,STAT_CACHE_ENGINE_KQUEUE = 2 /* same as STAT_CACHE_ENGINE_FAM */
47 };
48
49 struct stat_cache_fam; /* declaration */
50
51 typedef struct stat_cache {
52 int stat_cache_engine;
53 splay_tree *files; /* nodes of tree are (stat_cache_entry *) */
54 struct stat_cache_fam *scf;
55 } stat_cache;
56
57 static stat_cache sc;
58
59
stat_cache_sptree_find(splay_tree ** const sptree,const char * const name,uint32_t len)60 static void * stat_cache_sptree_find(splay_tree ** const sptree,
61 const char * const name,
62 uint32_t len)
63 {
64 const int ndx = splaytree_djbhash(name, len);
65 *sptree = splaytree_splay(*sptree, ndx);
66 return (*sptree && (*sptree)->key == ndx) ? (*sptree)->data : NULL;
67 }
68
69
70 #if defined(HAVE_SYS_INOTIFY_H) \
71 || (defined(HAVE_SYS_EVENT_H) && defined(HAVE_KQUEUE))
72 #ifndef HAVE_FAM_H
73 #define HAVE_FAM_H
74 #endif
75 #endif
76
77 #ifdef HAVE_FAM_H
78
79 /* monitor changes in directories using FAM
80 *
81 * This implementation employing FAM monitors directories as they are used,
82 * and maintains a reference count for cache use within stat_cache.c.
83 * A periodic job runs in lighttpd every 32 seconds, expiring entries unused
84 * in last 64 seconds out of the cache and cancelling FAM monitoring. Items
85 * within the cache are checked against the filesystem upon use if last stat()
86 * was greater than or equal to 16 seconds ago.
87 *
88 * This implementation does not monitor every directory in a tree, and therefore
89 * the cache may get out-of-sync with the filesystem. Delays in receiving and
90 * processing events from FAM might also lead to stale cache entries.
91 *
92 * For many websites, a large number of files are seldom, if ever, modified,
93 * and a common practice with images is to create a new file with a new name
94 * when a new version is needed, in order for client browsers and CDNs to better
95 * cache the content. Given this, most use will see little difference in
96 * performance between server.stat-cache-engine = "fam" and "simple" (default).
97 * The default server.stat-cache-engine = "simple" calls stat() on a target once
98 * per second, and reuses that information until the next second. For use where
99 * changes must be immediately visible, server.stat-cache-engine = "disable"
100 * should be used.
101 *
102 * When considering use of server.stat-cache-engine = "fam", there are a few
103 * additional limitations for this cache implementation using FAM.
104 * - symlinks to files located outside of the current directory do not result
105 * in changes to that file being monitored (unless that file is in a directory
106 * which is monitored as a result of a different request). symlinks can be
107 * chained and can be circular. This implementation *does not* readlink() or
108 * realpath() to resolve the chains to find and monitor the ultimate target
109 * directory. While symlinks to files located outside the current directory
110 * are not monitored, symlinks to directories *are* monitored, though chains
111 * of symlinks to directories do not result in monitoring of the directories
112 * containing intermediate symlinks to the target directory.
113 * - directory rename of a directory which is not currently being monitored will
114 * result in stale information in the cache if there is a subdirectory that is
115 * being monitored.
116 * Even though lighttpd will not receive FAM events in the above cases, lighttpd
117 * does re-validate the information in the cache upon use if the cache entry has
118 * not been checked in 16 seconds, so that is the upper limit for use of stale
119 * data.
120 *
121 * Use of server.stat-cache-engine = "fam" is discouraged for extremely volatile
122 * directories such as temporary directories (e.g. /tmp and maybe /var/tmp) due
123 * to the overhead of processing the additional noise generated from changes.
124 * Related, server.stat-cache-engine = "fam" is not recommended on trees of
125 * untrusted files where a malicious user could generate an excess of change
126 * events.
127 *
128 * Internal note: lighttpd walks the caches to prune trees in stat_cache when an
129 * event is received for a directory (or symlink to a directory) which has been
130 * deleted or renamed. The splaytree data structure is suboptimal for frequent
131 * changes of large directories trees where there have been a large number of
132 * different files recently accessed and part of the stat_cache.
133 */
134
135 #if defined(HAVE_SYS_INOTIFY_H) \
136 && !(defined(HAVE_SYS_EVENT_H) && defined(HAVE_KQUEUE))
137
138 #include <sys/inotify.h>
139 #ifndef IN_EXCL_UNLINK /*(not defined in some very old glibc headers)*/
140 #define IN_EXCL_UNLINK 0x04000000
141 #endif
142
143 /*(translate FAM API to inotify; this is specific to stat_cache.c use of FAM)*/
144 #define fam fd /*(translate struct stat_cache_fam scf->fam -> scf->fd)*/
145 typedef int FAMRequest; /*(fr)*/
146 #define FAMClose(fd) \
147 close(*(fd))
148 #define FAMCancelMonitor(fd, wd) \
149 inotify_rm_watch(*(fd), *(wd))
150 #define fam_watch_mask ( IN_ATTRIB | IN_CREATE | IN_DELETE | IN_DELETE_SELF \
151 | IN_MODIFY | IN_MOVE_SELF | IN_MOVED_FROM \
152 | IN_EXCL_UNLINK | IN_ONLYDIR )
153 /*(note: follows symlinks; not providing IN_DONT_FOLLOW)*/
154 #define FAMMonitorDirectory(fd, fn, wd, userData) \
155 ((*(wd) = inotify_add_watch(*(fd), (fn), (fam_watch_mask))) < 0)
156 typedef enum FAMCodes { /*(copied from fam.h to define arbitrary enum values)*/
157 FAMChanged=1,
158 FAMDeleted=2,
159 FAMCreated=5,
160 FAMMoved=6,
161 } FAMCodes;
162
163 #elif defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE
164 #undef HAVE_SYS_INOTIFY_H
165
166 #include <sys/event.h>
167 #include <sys/time.h>
168
169 /*(translate FAM API to inotify; this is specific to stat_cache.c use of FAM)*/
170 #define fam fd /*(translate struct stat_cache_fam scf->fam -> scf->fd)*/
171 typedef int FAMRequest; /*(fr)*/
172 #define FAMClose(fd) \
173 (-1 != (*(fd)) ? close(*(fd)) : 0)
FAMCancelMonitor(const int * const fd,int * const wd)174 static int FAMCancelMonitor (const int * const fd, int * const wd)
175 {
176 if (-1 == *fd) return 0;
177 if (-1 == *wd) return 0;
178 struct timespec t0 = { 0, 0 };
179 struct kevent kev;
180 EV_SET(&kev, *wd, EVFILT_VNODE, EV_DELETE, 0, 0, 0);
181 int rc = kevent(*fd, &kev, 1, NULL, 0, &t0);
182 close(*wd);
183 *wd = -1;
184 return rc;
185 }
FAMMonitorDirectory(int * const fd,char * const fn,int * const wd,void * const userData)186 static int FAMMonitorDirectory (int * const fd, char * const fn, int * const wd, void * const userData)
187 {
188 *wd = fdevent_open_dirname(fn, 1); /*(note: follows symlinks)*/
189 if (-1 == *wd) return -1;
190 struct timespec t0 = { 0, 0 };
191 struct kevent kev;
192 unsigned short kev_flags = EV_ADD | EV_ENABLE | EV_CLEAR;
193 unsigned int kev_fflags = NOTE_ATTRIB | NOTE_EXTEND | NOTE_LINK | NOTE_WRITE
194 | NOTE_DELETE | NOTE_REVOKE | NOTE_RENAME;
195 EV_SET(&kev, *wd, EVFILT_VNODE, kev_flags, kev_fflags, 0, userData);
196 return kevent(*fd, &kev, 1, NULL, 0, &t0);
197 }
198 typedef enum FAMCodes { /*(copied from fam.h to define arbitrary enum values)*/
199 FAMChanged=1,
200 FAMDeleted=2,
201 FAMCreated=5,
202 FAMMoved=6,
203 } FAMCodes;
204
205 #else
206
207 #include <fam.h>
208
209 #ifdef HAVE_FAMNOEXISTS
210 #ifndef LIGHTTPD_STATIC
211 #ifdef HAVE_DLFCN_H
212 #include <dlfcn.h>
213 #endif
214 #endif
215 #endif
216
217 #endif
218
219 typedef struct fam_dir_entry {
220 buffer name;
221 int refcnt;
222 FAMRequest req;
223 unix_time64_t stat_ts;
224 dev_t st_dev;
225 ino_t st_ino;
226 struct fam_dir_entry *fam_parent;
227 } fam_dir_entry;
228
229 typedef struct stat_cache_fam {
230 splay_tree *dirs; /* indexed by path; node data is fam_dir_entry */
231 #ifdef HAVE_SYS_INOTIFY_H
232 splay_tree *wds; /* indexed by inotify watch descriptor */
233 #elif defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE
234 #else
235 FAMConnection fam;
236 #endif
237 log_error_st *errh;
238 fdevents *ev;
239 fdnode *fdn;
240 int fd;
241 } stat_cache_fam;
242
243 __attribute_returns_nonnull__
fam_dir_entry_init(const char * name,size_t len)244 static fam_dir_entry * fam_dir_entry_init(const char *name, size_t len)
245 {
246 fam_dir_entry * const fam_dir = ck_calloc(1, sizeof(*fam_dir));
247 buffer_copy_string_len(&fam_dir->name, name, len);
248 fam_dir->refcnt = 0;
249 #if defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE
250 fam_dir->req = -1;
251 #endif
252
253 return fam_dir;
254 }
255
fam_dir_entry_free(fam_dir_entry * fam_dir)256 static void fam_dir_entry_free(fam_dir_entry *fam_dir)
257 {
258 if (!fam_dir) return;
259 /*(fam_dir->fam_parent might be invalid pointer here; ignore)*/
260 free(fam_dir->name.ptr);
261 #if defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE
262 if (-1 != fam_dir->req)
263 close(fam_dir->req);
264 #endif
265 free(fam_dir);
266 }
267
fam_dir_invalidate_node(fam_dir_entry * fam_dir)268 static void fam_dir_invalidate_node(fam_dir_entry *fam_dir)
269 {
270 fam_dir->stat_ts = 0;
271 if (fam_dir->fam_parent) {
272 --fam_dir->fam_parent->refcnt;
273 fam_dir->fam_parent = NULL;
274 }
275 }
276
277 /*
278 * walk though splay_tree and collect contents of dir tree.
279 * remove tagged entries in a second loop
280 */
281
fam_dir_tag_refcnt(splay_tree * t,int * keys,int * ndx)282 static void fam_dir_tag_refcnt(splay_tree *t, int *keys, int *ndx)
283 {
284 if (*ndx == 512) return; /*(must match num array entries in keys[])*/
285 if (t->left) fam_dir_tag_refcnt(t->left, keys, ndx);
286 if (t->right) fam_dir_tag_refcnt(t->right, keys, ndx);
287 if (*ndx == 512) return; /*(must match num array entries in keys[])*/
288
289 fam_dir_entry * const fam_dir = t->data;
290 if (0 == fam_dir->refcnt) {
291 fam_dir_invalidate_node(fam_dir);
292 keys[(*ndx)++] = t->key;
293 }
294 }
295
296 __attribute_noinline__
fam_dir_periodic_cleanup(void)297 static void fam_dir_periodic_cleanup(void) {
298 stat_cache_fam * const scf = sc.scf;
299 int max_ndx, i;
300 int keys[512]; /* 2k size on stack */
301 #if defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE
302 struct kevent kevl[512]; /* 32k size on stack to batch kevent EV_DELETE */
303 #endif
304 do {
305 if (!scf->dirs) break;
306 max_ndx = 0;
307 fam_dir_tag_refcnt(scf->dirs, keys, &max_ndx);
308 for (i = 0; i < max_ndx; ++i) {
309 const int ndx = keys[i];
310 splay_tree *node = scf->dirs = splaytree_splay(scf->dirs, ndx);
311 if (node && node->key == ndx) {
312 fam_dir_entry *fam_dir = node->data;
313 scf->dirs = splaytree_delete(scf->dirs, ndx);
314 #ifdef HAVE_SYS_INOTIFY_H
315 scf->wds = splaytree_delete(scf->wds, fam_dir->req);
316 #elif defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE
317 /* batch process kevent removal; defer cancel */
318 EV_SET(kevl+i, fam_dir->req, EVFILT_VNODE, EV_DELETE, 0, 0, 0);
319 fam_dir->req = -1; /*(make FAMCancelMonitor() a no-op)*/
320 #endif
321 FAMCancelMonitor(&scf->fam, &fam_dir->req);
322 fam_dir_entry_free(fam_dir);
323 }
324 }
325 #if defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE
326 /* batch process: kevent() to submit EV_DELETE, then close dir fds */
327 if (0 == max_ndx) break;
328 struct timespec t0 = { 0, 0 };
329 kevent(scf->fd, kevl, max_ndx, NULL, 0, &t0);
330 for (i = 0; i < max_ndx; ++i)
331 close((int)kevl[i].ident);
332 #endif
333 } while (max_ndx == sizeof(keys)/sizeof(int));
334 }
335
fam_dir_invalidate_tree(splay_tree * t,const char * name,size_t len)336 static void fam_dir_invalidate_tree(splay_tree *t, const char *name, size_t len)
337 {
338 #ifdef __clang_analyzer__
339 force_assert(name);
340 #endif
341 /*force_assert(t);*/
342 if (t->left) fam_dir_invalidate_tree(t->left, name, len);
343 if (t->right) fam_dir_invalidate_tree(t->right, name, len);
344
345 fam_dir_entry * const fam_dir = t->data;
346 #ifdef __clang_analyzer__
347 force_assert(fam_dir);
348 #endif
349 const buffer * const b = &fam_dir->name;
350 size_t blen = buffer_clen(b);
351 if (blen > len && b->ptr[len] == '/' && 0 == memcmp(b->ptr, name, len))
352 fam_dir_invalidate_node(fam_dir);
353 }
354
355 /* declarations */
356 static void stat_cache_delete_tree(const char *name, uint32_t len);
357 static void stat_cache_invalidate_dir_tree(const char *name, size_t len);
358 static void stat_cache_handle_fdevent_fn(stat_cache_fam * const scf, fam_dir_entry * const fam_dir, const char * const fn, const uint32_t fnlen, int code);
359
stat_cache_handle_fdevent_in(stat_cache_fam * scf)360 static void stat_cache_handle_fdevent_in(stat_cache_fam *scf)
361 {
362 #ifdef HAVE_SYS_INOTIFY_H
363 /*(inotify pads in->len to align struct following in->name[])*/
364 char buf[4096]
365 __attribute__ ((__aligned__(__alignof__(struct inotify_event))));
366 int rd;
367 do {
368 rd = (int)read(scf->fd, buf, sizeof(buf));
369 if (rd <= 0) {
370 if (-1 == rd && errno != EINTR && errno != EAGAIN) {
371 log_perror(scf->errh, __FILE__, __LINE__, "inotify error");
372 /* TODO: could flush cache, close scf->fd, and re-open inotify*/
373 }
374 break;
375 }
376 for (int i = 0; i < rd; ) {
377 struct inotify_event * const in =
378 (struct inotify_event *)((uintptr_t)buf + i);
379 uint32_t len = in->len;
380 if (len > sizeof(buf)) break; /*(should not happen)*/
381 i += sizeof(struct inotify_event) + len;
382 if (i > rd) break; /*(should not happen (partial record))*/
383 if (in->mask & IN_CREATE)
384 continue; /*(see comment below for FAMCreated)*/
385 if (in->mask & IN_Q_OVERFLOW) {
386 log_error(scf->errh, __FILE__, __LINE__,
387 "inotify queue overflow");
388 continue;
389 }
390 /* ignore events which may have been pending for
391 * paths recently cancelled via FAMCancelMonitor() */
392 scf->wds = splaytree_splay(scf->wds, in->wd);
393 if (!scf->wds || scf->wds->key != in->wd)
394 continue;
395 fam_dir_entry *fam_dir = scf->wds->data;
396 if (NULL == fam_dir) /*(should not happen)*/
397 continue;
398 if (fam_dir->req != in->wd) /*(should not happen)*/
399 continue;
400 /*(specific to use here in stat_cache.c)*/
401 int code = 0;
402 if (in->mask & (IN_ATTRIB | IN_MODIFY))
403 code = FAMChanged;
404 else if (in->mask & (IN_DELETE | IN_DELETE_SELF | IN_UNMOUNT))
405 code = FAMDeleted;
406 else if (in->mask & (IN_MOVE_SELF | IN_MOVED_FROM))
407 code = FAMMoved;
408
409 if (len) {
410 do { --len; } while (len && in->name[len-1] == '\0');
411 }
412 stat_cache_handle_fdevent_fn(scf, fam_dir, in->name, len, code);
413 }
414 } while (rd + sizeof(struct inotify_event) + NAME_MAX + 1 > sizeof(buf));
415 #elif defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE
416 struct kevent kevl[256];
417 struct timespec t0 = { 0, 0 };
418 int n;
419 do {
420 n = kevent(scf->fd, NULL, 0, kevl, sizeof(kevl)/sizeof(*kevl), &t0);
421 if (n <= 0) break;
422 for (int i = 0; i < n; ++i) {
423 const struct kevent * const kev = kevl+i;
424 /* ignore events which may have been pending for
425 * paths recently cancelled via FAMCancelMonitor() */
426 int ndx = (int)(intptr_t)kev->udata;
427 scf->dirs = splaytree_splay(scf->dirs, ndx);
428 if (!scf->dirs || scf->dirs->key != ndx)
429 continue;
430 fam_dir_entry *fam_dir = scf->dirs->data;
431 if (fam_dir->req != (int)kev->ident)
432 continue;
433 /*(specific to use here in stat_cache.c)*/
434 /* note: stat_cache only monitors on directories,
435 * so events here are only on directories
436 * note: changes are treated as FAMDeleted since
437 * it is unknown which file in dir was changed
438 * This is not efficient, but this stat_cache mechanism also
439 * should not be used on frequently modified directories. */
440 int code = 0;
441 if (kev->fflags & (NOTE_WRITE|NOTE_ATTRIB|NOTE_EXTEND|NOTE_LINK))
442 code = FAMDeleted; /*(not FAMChanged; see comment above)*/
443 else if (kev->fflags & (NOTE_DELETE|NOTE_REVOKE))
444 code = FAMDeleted;
445 else if (kev->fflags & NOTE_RENAME)
446 code = FAMMoved;
447 if (kev->flags & EV_ERROR) /*(not expected; treat as FAMDeleted)*/
448 code = FAMDeleted;
449 stat_cache_handle_fdevent_fn(scf, fam_dir, NULL, 0, code);
450 }
451 } while (n == sizeof(kevl)/sizeof(*kevl));
452 #else
453 for (int i = 0, ndx; i || (i = FAMPending(&scf->fam)) > 0; --i) {
454 FAMEvent fe;
455 if (FAMNextEvent(&scf->fam, &fe) < 0) break;
456
457 /* ignore events which may have been pending for
458 * paths recently cancelled via FAMCancelMonitor() */
459 ndx = (int)(intptr_t)fe.userdata;
460 scf->dirs = splaytree_splay(scf->dirs, ndx);
461 if (!scf->dirs || scf->dirs->key != ndx) {
462 continue;
463 }
464 fam_dir_entry *fam_dir = scf->dirs->data;
465 if (FAMREQUEST_GETREQNUM(&fam_dir->req)
466 != FAMREQUEST_GETREQNUM(&fe.fr)) {
467 continue;
468 }
469
470 uint32_t fnlen = (fe.code != FAMCreated && fe.filename[0] != '/')
471 ? (uint32_t)strlen(fe.filename)
472 : 0;
473 stat_cache_handle_fdevent_fn(scf, fam_dir, fe.filename, fnlen, fe.code);
474 }
475 #endif
476 }
477
stat_cache_handle_fdevent_fn(stat_cache_fam * const scf,fam_dir_entry * fam_dir,const char * const fn,const uint32_t fnlen,int code)478 static void stat_cache_handle_fdevent_fn(stat_cache_fam * const scf, fam_dir_entry *fam_dir, const char * const fn, const uint32_t fnlen, int code)
479 {
480 if (fnlen) {
481 buffer * const n = &fam_dir->name;
482 fam_dir_entry *fam_link;
483 uint32_t len;
484 switch (code) {
485 case FAMCreated:
486 /* file created in monitored dir modifies dir and
487 * we should get a separate FAMChanged event for dir.
488 * Therefore, ignore file FAMCreated event here.
489 * Also, if FAMNoExists() is used, might get spurious
490 * FAMCreated events as changes are made e.g. in monitored
491 * sub-sub-sub dirs and the library discovers new (already
492 * existing) dir entries */
493 return;
494 case FAMChanged:
495 /* file changed in monitored dir does not modify dir */
496 case FAMDeleted:
497 case FAMMoved:
498 /* file deleted or moved in monitored dir modifies dir,
499 * but FAM provides separate notification for that */
500
501 /* temporarily append filename to dir in fam_dir->name to
502 * construct path, then delete stat_cache entry (if any)*/
503 len = buffer_clen(n);
504 buffer_append_path_len(n, fn, fnlen);
505 /* (alternatively, could chose to stat() and update)*/
506 stat_cache_invalidate_entry(BUF_PTR_LEN(n));
507
508 fam_link = /*(check if might be symlink to monitored dir)*/
509 stat_cache_sptree_find(&scf->dirs, BUF_PTR_LEN(n));
510 if (fam_link && !buffer_is_equal(&fam_link->name, n))
511 fam_link = NULL;
512
513 buffer_truncate(n, len);
514
515 if (fam_link) {
516 /* replaced symlink changes containing dir */
517 stat_cache_invalidate_entry(n->ptr, len);
518 /* handle symlink to dir as deleted dir below */
519 code = FAMDeleted;
520 fam_dir = fam_link;
521 break;
522 }
523 return;
524 default:
525 return;
526 }
527 }
528
529 switch(code) {
530 case FAMChanged:
531 stat_cache_invalidate_entry(BUF_PTR_LEN(&fam_dir->name));
532 break;
533 case FAMDeleted:
534 case FAMMoved:
535 stat_cache_delete_tree(BUF_PTR_LEN(&fam_dir->name));
536 fam_dir_invalidate_node(fam_dir);
537 if (scf->dirs)
538 fam_dir_invalidate_tree(scf->dirs,
539 BUF_PTR_LEN(&fam_dir->name));
540 fam_dir_periodic_cleanup();
541 break;
542 default:
543 break;
544 }
545 }
546
stat_cache_handle_fdevent(void * ctx,int revent)547 static handler_t stat_cache_handle_fdevent(void *ctx, int revent)
548 {
549 stat_cache_fam * const scf = ctx; /* sc.scf */
550
551 if (revent & FDEVENT_IN) {
552 stat_cache_handle_fdevent_in(scf);
553 }
554
555 if (revent & (FDEVENT_HUP|FDEVENT_RDHUP)) {
556 /* fam closed the connection */
557 log_error(scf->errh, __FILE__, __LINE__,
558 "FAM connection closed; disabling stat_cache.");
559 /* (although effectively STAT_CACHE_ENGINE_NONE,
560 * do not change here so that periodic jobs clean up memory)*/
561 /*sc.stat_cache_engine = STAT_CACHE_ENGINE_NONE; */
562 fdevent_fdnode_event_del(scf->ev, scf->fdn);
563 fdevent_unregister(scf->ev, scf->fdn);
564 scf->fdn = NULL;
565
566 FAMClose(&scf->fam);
567 scf->fd = -1;
568 }
569
570 return HANDLER_GO_ON;
571 }
572
stat_cache_init_fam(fdevents * ev,log_error_st * errh)573 static stat_cache_fam * stat_cache_init_fam(fdevents *ev, log_error_st *errh) {
574 stat_cache_fam *scf = ck_calloc(1, sizeof(*scf));
575 scf->fd = -1;
576 scf->ev = ev;
577 scf->errh = errh;
578
579 #ifdef HAVE_SYS_INOTIFY_H
580 #if !defined(IN_NONBLOCK) || !defined(IN_CLOEXEC)
581 scf->fd = inotify_init();
582 if (scf->fd >= 0 && 0 != fdevent_fcntl_set_nb_cloexec(scf->fd)) {
583 close(scf->fd);
584 scf->fd = -1;
585 }
586 #else
587 scf->fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
588 #endif
589 if (scf->fd < 0) {
590 log_perror(errh, __FILE__, __LINE__, "inotify_init1()");
591 free(scf);
592 return NULL;
593 }
594 #elif defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE
595 #ifdef __NetBSD__
596 scf->fd = kqueue1(O_NONBLOCK|O_CLOEXEC|O_NOSIGPIPE);
597 #else
598 scf->fd = kqueue();
599 if (scf->fd >= 0) fdevent_setfd_cloexec(scf->fd);
600 #endif
601 if (scf->fd < 0) {
602 log_perror(errh, __FILE__, __LINE__, "kqueue()");
603 free(scf);
604 return NULL;
605 }
606 #else
607 /* setup FAM */
608 if (0 != FAMOpen2(&scf->fam, "lighttpd")) {
609 log_error(errh, __FILE__, __LINE__,
610 "could not open a fam connection, dying.");
611 free(scf);
612 return NULL;
613 }
614 #ifdef HAVE_FAMNOEXISTS
615 #ifdef LIGHTTPD_STATIC
616 FAMNoExists(&scf->fam);
617 #else
618 int (*FAMNoExists_fn)(FAMConnection *);
619 FAMNoExists_fn =
620 (int (*)(FAMConnection *))(intptr_t)dlsym(RTLD_DEFAULT,"FAMNoExists");
621 if (FAMNoExists_fn) FAMNoExists_fn(&scf->fam);
622 #endif
623 #endif
624
625 scf->fd = FAMCONNECTION_GETFD(&scf->fam);
626 fdevent_setfd_cloexec(scf->fd);
627 #endif
628 scf->fdn = fdevent_register(scf->ev, scf->fd, stat_cache_handle_fdevent, scf);
629 fdevent_fdnode_event_set(scf->ev, scf->fdn, FDEVENT_IN | FDEVENT_RDHUP);
630
631 return scf;
632 }
633
stat_cache_free_fam(stat_cache_fam * scf)634 static void stat_cache_free_fam(stat_cache_fam *scf) {
635 if (NULL == scf) return;
636
637 #ifdef HAVE_SYS_INOTIFY_H
638 while (scf->wds) {
639 splay_tree *node = scf->wds;
640 scf->wds = splaytree_delete(scf->wds, node->key);
641 }
642 #elif defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE
643 /*(quicker cleanup to close kqueue() before cancel per entry)*/
644 close(scf->fd);
645 scf->fd = -1;
646 #endif
647 while (scf->dirs) {
648 /*(skip entry invalidation and FAMCancelMonitor())*/
649 splay_tree *node = scf->dirs;
650 fam_dir_entry_free((fam_dir_entry *)node->data);
651 scf->dirs = splaytree_delete(scf->dirs, node->key);
652 }
653
654 if (-1 != scf->fd) {
655 /*scf->fdn already cleaned up in fdevent_free()*/
656 FAMClose(&scf->fam);
657 /*scf->fd = -1;*/
658 }
659
660 free(scf);
661 }
662
fam_dir_monitor(stat_cache_fam * scf,char * fn,uint32_t dirlen,struct stat * st)663 static fam_dir_entry * fam_dir_monitor(stat_cache_fam *scf, char *fn, uint32_t dirlen, struct stat *st)
664 {
665 if (NULL == scf->fdn) return NULL; /* FAM connection closed; do nothing */
666 const int fn_is_dir = S_ISDIR(st->st_mode);
667 /*force_assert(0 != dirlen);*/
668 /*force_assert(fn[0] == '/');*/
669 /* consistency: ensure fn does not end in '/' unless root "/"
670 * FAM events will not end in '/', so easier to match this way */
671 if (fn[dirlen-1] == '/') --dirlen;
672 if (0 == dirlen) dirlen = 1; /* root dir ("/") */
673 /* Note: paths are expected to be normalized before calling stat_cache,
674 * e.g. without repeated '/' */
675 if (!fn_is_dir) {
676 while (fn[--dirlen] != '/') ;
677 if (0 == dirlen) dirlen = 1; /*(should not happen for file)*/
678 }
679 int dir_ndx = splaytree_djbhash(fn, dirlen);
680 fam_dir_entry *fam_dir = NULL;
681
682 scf->dirs = splaytree_splay(scf->dirs, dir_ndx);
683 if (NULL != scf->dirs && scf->dirs->key == dir_ndx) {
684 fam_dir = scf->dirs->data;
685 if (!buffer_eq_slen(&fam_dir->name, fn, dirlen)) {
686 /* hash collision; preserve existing
687 * do not monitor new to avoid cache thrashing */
688 return NULL;
689 }
690 /* directory already registered */
691 }
692
693 const unix_time64_t cur_ts = log_monotonic_secs;
694 struct stat lst;
695 int ck_dir = fn_is_dir;
696 if (!fn_is_dir && (NULL==fam_dir || cur_ts - fam_dir->stat_ts >= 16)) {
697 ck_dir = 1;
698 /*(temporarily modify fn)*/
699 fn[dirlen] = '\0';
700 if (0 != lstat(fn, &lst)) {
701 fn[dirlen] = '/';
702 return NULL;
703 }
704 if (!S_ISLNK(lst.st_mode)) {
705 st = &lst;
706 }
707 else if (0 != stat(fn, st)) { /*st passed in now is stat() of dir*/
708 fn[dirlen] = '/';
709 return NULL;
710 }
711 fn[dirlen] = '/';
712 }
713
714 int ck_lnk = (NULL == fam_dir);
715 if (ck_dir && NULL != fam_dir) {
716 /* check stat() matches device and inode, just in case an external event
717 * not being monitored occurs (e.g. rename of unmonitored parent dir)*/
718 if (st->st_dev != fam_dir->st_dev || st->st_ino != fam_dir->st_ino) {
719 ck_lnk = 1;
720 /*(modifies scf->dirs but no need to re-splay for dir_ndx since
721 * fam_dir is not NULL and so splaytree_insert not called below)*/
722 if (scf->dirs) fam_dir_invalidate_tree(scf->dirs, fn, dirlen);
723 if (!fn_is_dir) /*(if dir, caller is updating stat_cache_entry)*/
724 stat_cache_update_entry(fn, dirlen, st, NULL);
725 /*(must not delete tree since caller is holding a valid node)*/
726 stat_cache_invalidate_dir_tree(fn, dirlen);
727 #ifdef HAVE_SYS_INOTIFY_H
728 scf->wds = splaytree_delete(scf->wds, fam_dir->req);
729 #endif
730 if (0 != FAMCancelMonitor(&scf->fam, &fam_dir->req)
731 || 0 != FAMMonitorDirectory(&scf->fam, fam_dir->name.ptr,
732 &fam_dir->req,
733 (void *)(intptr_t)dir_ndx)) {
734 fam_dir->stat_ts = 0; /* invalidate */
735 return NULL;
736 }
737 fam_dir->st_dev = st->st_dev;
738 fam_dir->st_ino = st->st_ino;
739 #ifdef HAVE_SYS_INOTIFY_H
740 scf->wds = splaytree_insert(scf->wds, fam_dir->req, fam_dir);
741 #endif
742 }
743 fam_dir->stat_ts = cur_ts;
744 }
745
746 if (NULL == fam_dir) {
747 fam_dir = fam_dir_entry_init(fn, dirlen);
748
749 if (0 != FAMMonitorDirectory(&scf->fam,fam_dir->name.ptr,&fam_dir->req,
750 (void *)(intptr_t)dir_ndx)) {
751 #if defined(HAVE_SYS_INOTIFY_H) \
752 || (defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE)
753 log_perror(scf->errh, __FILE__, __LINE__,
754 "monitoring dir failed: %s file: %s",
755 fam_dir->name.ptr, fn);
756 #else
757 log_error(scf->errh, __FILE__, __LINE__,
758 "monitoring dir failed: %s file: %s %s",
759 fam_dir->name.ptr, fn, FamErrlist[FAMErrno]);
760 #endif
761 fam_dir_entry_free(fam_dir);
762 return NULL;
763 }
764
765 scf->dirs = splaytree_insert(scf->dirs, dir_ndx, fam_dir);
766 #ifdef HAVE_SYS_INOTIFY_H
767 scf->wds = splaytree_insert(scf->wds, fam_dir->req, fam_dir);
768 #endif
769 fam_dir->stat_ts= cur_ts;
770 fam_dir->st_dev = st->st_dev;
771 fam_dir->st_ino = st->st_ino;
772 }
773
774 if (ck_lnk) {
775 if (fn_is_dir) {
776 /*(temporarily modify fn)*/
777 char e = fn[dirlen];
778 fn[dirlen] = '\0';
779 if (0 != lstat(fn, &lst)) {
780 fn[dirlen] = e;
781 return NULL;
782 }
783 fn[dirlen] = e;
784 }
785 if (fam_dir->fam_parent) {
786 --fam_dir->fam_parent->refcnt;
787 fam_dir->fam_parent = NULL;
788 }
789 if (S_ISLNK(lst.st_mode)) {
790 fam_dir->fam_parent = fam_dir_monitor(scf, fn, dirlen, &lst);
791 }
792 }
793
794 ++fam_dir->refcnt;
795 return fam_dir;
796 }
797
798 #endif
799
800
801 __attribute_malloc__
802 __attribute_noinline__
803 __attribute_returns_nonnull__
stat_cache_entry_init(void)804 static stat_cache_entry * stat_cache_entry_init(void) {
805 stat_cache_entry *sce = ck_calloc(1, sizeof(*sce));
806 sce->fd = -1;
807 sce->refcnt = 1;
808 return sce;
809 }
810
stat_cache_entry_free(void * data)811 static void stat_cache_entry_free(void *data) {
812 stat_cache_entry *sce = data;
813 if (!sce) return;
814
815 if (--sce->refcnt) return;
816
817 #ifdef HAVE_FAM_H
818 /*(decrement refcnt only;
819 * defer cancelling FAM monitor on dir even if refcnt reaches zero)*/
820 if (sce->fam_dir) --((fam_dir_entry *)sce->fam_dir)->refcnt;
821 #endif
822
823 free(sce->name.ptr);
824 free(sce->etag.ptr);
825 if (sce->content_type.size) free(sce->content_type.ptr);
826 if (sce->fd >= 0) close(sce->fd);
827
828 free(sce);
829 }
830
stat_cache_entry_refchg(void * data,int mod)831 void stat_cache_entry_refchg(void *data, int mod) {
832 /*(expect mod == -1 or mod == 1)*/
833 stat_cache_entry * const sce = data;
834 if (mod < 0 && 1 == sce->refcnt)
835 stat_cache_entry_free(data);
836 else
837 sce->refcnt += mod;
838 }
839
840 #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
841
842 static const char *attrname = "Content-Type";
843 static char attrval[128];
844 static buffer attrb = { attrval, 0, 0 };
845
stat_cache_attr_get(const char * name)846 static int stat_cache_attr_get(const char *name) {
847 #if defined(HAVE_XATTR)
848 #if defined(HAVE_SYS_XATTR_H)
849 ssize_t attrlen;
850 if (0 < (attrlen = getxattr(name, attrname,
851 attrval, sizeof(attrval)-1)))
852 #else
853 int attrlen = sizeof(attrval)-1;
854 if (0 == attr_get(name, attrname, attrval, &attrlen, 0))
855 #endif
856 #elif defined(HAVE_EXTATTR)
857 ssize_t attrlen;
858 if (0 < (attrlen = extattr_get_file(name, EXTATTR_NAMESPACE_USER, attrname,
859 attrval, sizeof(attrval)-1)))
860 #endif
861 {
862 attrval[attrlen] = '\0';
863 attrb.used = (uint32_t)(attrlen + 1);
864 return 1;
865 }
866 return 0;
867 }
868
869 #endif
870
stat_cache_init(fdevents * ev,log_error_st * errh)871 int stat_cache_init(fdevents *ev, log_error_st *errh) {
872 #ifdef HAVE_FAM_H
873 if (sc.stat_cache_engine == STAT_CACHE_ENGINE_FAM) {
874 sc.scf = stat_cache_init_fam(ev, errh);
875 if (NULL == sc.scf) return 0;
876 }
877 #else
878 UNUSED(ev);
879 UNUSED(errh);
880 #endif
881
882 return 1;
883 }
884
stat_cache_free(void)885 void stat_cache_free(void) {
886 splay_tree *sptree = sc.files;
887 while (sptree) {
888 stat_cache_entry_free(sptree->data);
889 sptree = splaytree_delete(sptree, sptree->key);
890 }
891 sc.files = NULL;
892
893 #ifdef HAVE_FAM_H
894 stat_cache_free_fam(sc.scf);
895 sc.scf = NULL;
896 #endif
897
898 #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
899 attrname = "Content-Type";
900 #endif
901
902 sc.stat_cache_engine = STAT_CACHE_ENGINE_SIMPLE; /*(default)*/
903 }
904
stat_cache_xattrname(const char * name)905 void stat_cache_xattrname (const char *name) {
906 #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
907 attrname = name;
908 #else
909 UNUSED(name);
910 #endif
911 }
912
stat_cache_choose_engine(const buffer * stat_cache_string,log_error_st * errh)913 int stat_cache_choose_engine (const buffer *stat_cache_string, log_error_st *errh) {
914 if (buffer_is_blank(stat_cache_string))
915 sc.stat_cache_engine = STAT_CACHE_ENGINE_SIMPLE;
916 else if (buffer_eq_slen(stat_cache_string, CONST_STR_LEN("simple")))
917 sc.stat_cache_engine = STAT_CACHE_ENGINE_SIMPLE;
918 #ifdef HAVE_SYS_INOTIFY_H
919 else if (buffer_eq_slen(stat_cache_string, CONST_STR_LEN("inotify")))
920 sc.stat_cache_engine = STAT_CACHE_ENGINE_INOTIFY;
921 /*(STAT_CACHE_ENGINE_FAM == STAT_CACHE_ENGINE_INOTIFY)*/
922 #elif defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE
923 else if (buffer_eq_slen(stat_cache_string, CONST_STR_LEN("kqueue")))
924 sc.stat_cache_engine = STAT_CACHE_ENGINE_KQUEUE;
925 /*(STAT_CACHE_ENGINE_FAM == STAT_CACHE_ENGINE_KQUEUE)*/
926 #endif
927 #ifdef HAVE_FAM_H
928 else if (buffer_eq_slen(stat_cache_string, CONST_STR_LEN("fam")))
929 sc.stat_cache_engine = STAT_CACHE_ENGINE_FAM;
930 #endif
931 else if (buffer_eq_slen(stat_cache_string, CONST_STR_LEN("disable"))
932 || buffer_eq_slen(stat_cache_string, CONST_STR_LEN("none")))
933 sc.stat_cache_engine = STAT_CACHE_ENGINE_NONE;
934 else {
935 log_error(errh, __FILE__, __LINE__,
936 "server.stat-cache-engine can be one of \"disable\", \"simple\","
937 #ifdef HAVE_SYS_INOTIFY_H
938 " \"inotify\","
939 #elif defined HAVE_SYS_EVENT_H && defined HAVE_KQUEUE
940 " \"kqueue\","
941 #endif
942 #ifdef HAVE_FAM_H
943 " \"fam\","
944 #endif
945 " but not: %s", stat_cache_string->ptr);
946 return -1;
947 }
948 return 0;
949 }
950
stat_cache_mimetype_by_ext(const array * const mimetypes,const char * const name,const uint32_t nlen)951 const buffer * stat_cache_mimetype_by_ext(const array * const mimetypes, const char * const name, const uint32_t nlen)
952 {
953 const char * const end = name + nlen; /*(end of string)*/
954 const uint32_t used = mimetypes->used;
955 if (used < 16) {
956 for (uint32_t i = 0; i < used; ++i) {
957 /* suffix match */
958 const data_string *ds = (data_string *)mimetypes->data[i];
959 const size_t klen = buffer_clen(&ds->key);
960 if (klen <= nlen && buffer_eq_icase_ssn(end-klen, ds->key.ptr, klen))
961 return &ds->value;
962 }
963 }
964 else {
965 const char *s;
966 const data_string *ds;
967 if (nlen) {
968 for (s = end-1; s != name && *s != '/'; --s) ; /*(like memrchr())*/
969 if (*s == '/') ++s;
970 }
971 else {
972 s = name;
973 }
974 /* search for basename, then longest .ext2.ext1, then .ext1, then "" */
975 ds = (const data_string *)array_get_element_klen(mimetypes, s, end - s);
976 if (NULL != ds) return &ds->value;
977 while (++s < end) {
978 while (*s != '.' && ++s != end) ;
979 if (s == end) break;
980 /* search ".ext" then "ext" */
981 ds = (const data_string *)array_get_element_klen(mimetypes, s, end - s);
982 if (NULL != ds) return &ds->value;
983 /* repeat search without leading '.' to handle situation where
984 * admin configured mimetype.assign keys without leading '.' */
985 if (++s < end) {
986 if (*s == '.') { --s; continue; }
987 ds = (const data_string *)array_get_element_klen(mimetypes, s, end - s);
988 if (NULL != ds) return &ds->value;
989 }
990 }
991 /* search for ""; catchall */
992 ds = (const data_string *)array_get_element_klen(mimetypes, CONST_STR_LEN(""));
993 if (NULL != ds) return &ds->value;
994 }
995
996 return NULL;
997 }
998
999 #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
1000
stat_cache_mimetype_by_xattr(const char * const name)1001 const buffer * stat_cache_mimetype_by_xattr(const char * const name)
1002 {
1003 return stat_cache_attr_get(name) ? &attrb : NULL;
1004 }
1005
stat_cache_content_type_get_by_xattr(stat_cache_entry * sce,const array * mimetypes,int use_xattr)1006 const buffer * stat_cache_content_type_get_by_xattr(stat_cache_entry *sce, const array *mimetypes, int use_xattr)
1007 {
1008 /*(invalid caching if user config has multiple, different
1009 * r->conf.mimetypes for same extension (not expected))*/
1010 if (!buffer_is_blank(&sce->content_type)) return &sce->content_type;
1011
1012 if (!S_ISREG(sce->st.st_mode)) return NULL;
1013
1014 /* cache mimetype */
1015 const buffer *mtype =
1016 (use_xattr) ? stat_cache_mimetype_by_xattr(sce->name.ptr) : NULL;
1017 if (NULL == mtype)
1018 mtype = stat_cache_mimetype_by_ext(mimetypes, BUF_PTR_LEN(&sce->name));
1019 if (NULL != mtype) {
1020 if (sce->content_type.size) {
1021 buffer_copy_buffer(&sce->content_type, mtype);
1022 }
1023 else if (mtype == &attrb) {
1024 sce->content_type.ptr = NULL;
1025 buffer_copy_buffer(&sce->content_type, mtype);
1026 }
1027 else {
1028 /*(copy pointers from mimetypes array; avoid allocation)*/
1029 sce->content_type.ptr = mtype->ptr;
1030 sce->content_type.used = mtype->used;
1031 /*(leave sce->content_type.size = 0 to flag not-allocated)*/
1032 }
1033 }
1034 else
1035 buffer_clear(&sce->content_type);
1036
1037 return &sce->content_type;
1038 }
1039
1040 #else
1041
stat_cache_content_type_get_by_ext(stat_cache_entry * sce,const array * mimetypes)1042 const buffer * stat_cache_content_type_get_by_ext(stat_cache_entry *sce, const array *mimetypes)
1043 {
1044 /*(invalid caching if user config has multiple, different
1045 * r->conf.mimetypes for same extension (not expected))*/
1046 if (!buffer_is_blank(&sce->content_type)) return &sce->content_type;
1047
1048 if (!S_ISREG(sce->st.st_mode)) return NULL;
1049
1050 /* cache mimetype */
1051 const buffer * const mtype =
1052 stat_cache_mimetype_by_ext(mimetypes, BUF_PTR_LEN(&sce->name));
1053 if (NULL != mtype) {
1054 /*(copy pointers from mimetypes array; avoid allocation)*/
1055 sce->content_type.ptr = mtype->ptr;
1056 sce->content_type.used = mtype->used;
1057 /*(leave sce->content_type.size = 0 to flag not-allocated)*/
1058 }
1059 else
1060 buffer_clear(&sce->content_type);
1061
1062 return &sce->content_type;
1063 }
1064
1065 #endif
1066
stat_cache_etag_get(stat_cache_entry * sce,int flags)1067 const buffer * stat_cache_etag_get(stat_cache_entry *sce, int flags) {
1068 /*(invalid caching if user cfg has multiple, different r->conf.etag_flags
1069 * for same path (not expected, since etag flags should be by filesystem))*/
1070 if (!buffer_is_blank(&sce->etag)) return &sce->etag;
1071
1072 if (S_ISREG(sce->st.st_mode) || S_ISDIR(sce->st.st_mode)) {
1073 if (0 == flags) return NULL;
1074 http_etag_create(&sce->etag, &sce->st, flags);
1075 return &sce->etag;
1076 }
1077
1078 return NULL;
1079 }
1080
1081 __attribute_pure__
stat_cache_stat_eq(const struct stat * const sta,const struct stat * const stb)1082 static int stat_cache_stat_eq(const struct stat * const sta, const struct stat * const stb) {
1083 return
1084 #ifdef st_mtime /* use high-precision timestamp if available */
1085 #if defined(__APPLE__) && defined(__MACH__)
1086 sta->st_mtimespec.tv_nsec == stb->st_mtimespec.tv_nsec
1087 #else
1088 sta->st_mtim.tv_nsec == stb->st_mtim.tv_nsec
1089 #endif
1090 #else
1091 1
1092 #endif
1093 && sta->st_mtime == stb->st_mtime
1094 && sta->st_size == stb->st_size
1095 && sta->st_ino == stb->st_ino
1096 && sta->st_dev == stb->st_dev;
1097 }
1098
stat_cache_update_entry(const char * name,uint32_t len,const struct stat * st,const buffer * etagb)1099 void stat_cache_update_entry(const char *name, uint32_t len,
1100 const struct stat *st, const buffer *etagb)
1101 {
1102 if (sc.stat_cache_engine == STAT_CACHE_ENGINE_NONE) return;
1103 force_assert(0 != len);
1104 if (name[len-1] == '/') { if (0 == --len) len = 1; }
1105 splay_tree **sptree = &sc.files;
1106 stat_cache_entry *sce =
1107 stat_cache_sptree_find(sptree, name, len);
1108 if (sce && buffer_is_equal_string(&sce->name, name, len)) {
1109 if (!stat_cache_stat_eq(&sce->st, st)) {
1110 /* etagb might be NULL to clear etag (invalidate) */
1111 buffer_clear(&sce->etag);
1112 if (etagb)
1113 buffer_copy_string_len(&sce->etag, BUF_PTR_LEN(etagb));
1114 #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
1115 buffer_clear(&sce->content_type);
1116 #endif
1117 if (sce->fd >= 0) {
1118 if (1 == sce->refcnt) {
1119 close(sce->fd);
1120 sce->fd = -1;
1121 }
1122 else {
1123 --sce->refcnt; /* stat_cache_entry_free(sce); */
1124 (*sptree)->data = sce = stat_cache_entry_init();
1125 buffer_copy_string_len(&sce->name, name, len);
1126 }
1127 }
1128 sce->st = *st;
1129 }
1130 sce->stat_ts = log_monotonic_secs;
1131 }
1132 }
1133
stat_cache_delete_entry(const char * name,uint32_t len)1134 void stat_cache_delete_entry(const char *name, uint32_t len)
1135 {
1136 if (sc.stat_cache_engine == STAT_CACHE_ENGINE_NONE) return;
1137 force_assert(0 != len);
1138 if (name[len-1] == '/') { if (0 == --len) len = 1; }
1139 splay_tree **sptree = &sc.files;
1140 stat_cache_entry *sce = stat_cache_sptree_find(sptree, name, len);
1141 if (sce && buffer_is_equal_string(&sce->name, name, len)) {
1142 stat_cache_entry_free(sce);
1143 *sptree = splaytree_delete(*sptree, (*sptree)->key);
1144 }
1145 }
1146
stat_cache_invalidate_entry(const char * name,uint32_t len)1147 void stat_cache_invalidate_entry(const char *name, uint32_t len)
1148 {
1149 splay_tree **sptree = &sc.files;
1150 stat_cache_entry *sce = stat_cache_sptree_find(sptree, name, len);
1151 if (sce && buffer_is_equal_string(&sce->name, name, len)) {
1152 sce->stat_ts = 0;
1153 #ifdef HAVE_FAM_H
1154 if (sce->fam_dir != NULL) {
1155 --((fam_dir_entry *)sce->fam_dir)->refcnt;
1156 sce->fam_dir = NULL;
1157 }
1158 #endif
1159 }
1160 }
1161
1162 #ifdef HAVE_FAM_H
1163
stat_cache_invalidate_dir_tree_walk(splay_tree * t,const char * name,size_t len)1164 static void stat_cache_invalidate_dir_tree_walk(splay_tree *t,
1165 const char *name, size_t len)
1166 {
1167 if (t->left) stat_cache_invalidate_dir_tree_walk(t->left, name, len);
1168 if (t->right) stat_cache_invalidate_dir_tree_walk(t->right, name, len);
1169
1170 const buffer * const b = &((stat_cache_entry *)t->data)->name;
1171 const size_t blen = buffer_clen(b);
1172 if (blen > len && b->ptr[len] == '/' && 0 == memcmp(b->ptr, name, len)) {
1173 stat_cache_entry *sce = t->data;
1174 sce->stat_ts = 0;
1175 if (sce->fam_dir != NULL) {
1176 --((fam_dir_entry *)sce->fam_dir)->refcnt;
1177 sce->fam_dir = NULL;
1178 }
1179 }
1180 }
1181
stat_cache_invalidate_dir_tree(const char * name,size_t len)1182 static void stat_cache_invalidate_dir_tree(const char *name, size_t len)
1183 {
1184 splay_tree * const sptree = sc.files;
1185 if (sptree) stat_cache_invalidate_dir_tree_walk(sptree, name, len);
1186 }
1187
1188 #endif
1189
1190 /*
1191 * walk though splay_tree and collect contents of dir tree.
1192 * remove tagged entries in a second loop
1193 */
1194
stat_cache_tag_dir_tree(splay_tree * t,const char * name,size_t len,int * keys,int * ndx)1195 static void stat_cache_tag_dir_tree(splay_tree *t, const char *name, size_t len,
1196 int *keys, int *ndx)
1197 {
1198 if (*ndx == 8192) return; /*(must match num array entries in keys[])*/
1199 if (t->left) stat_cache_tag_dir_tree(t->left, name, len, keys, ndx);
1200 if (t->right) stat_cache_tag_dir_tree(t->right, name, len, keys, ndx);
1201 if (*ndx == 8192) return; /*(must match num array entries in keys[])*/
1202
1203 const buffer * const b = &((stat_cache_entry *)t->data)->name;
1204 const size_t blen = buffer_clen(b);
1205 if (blen > len && b->ptr[len] == '/' && 0 == memcmp(b->ptr, name, len))
1206 keys[(*ndx)++] = t->key;
1207 }
1208
1209 __attribute_noinline__
stat_cache_prune_dir_tree(const char * name,size_t len)1210 static void stat_cache_prune_dir_tree(const char *name, size_t len)
1211 {
1212 splay_tree *sptree = sc.files;
1213 int max_ndx, i;
1214 int keys[8192]; /* 32k size on stack */
1215 do {
1216 if (!sptree) break;
1217 max_ndx = 0;
1218 stat_cache_tag_dir_tree(sptree, name, len, keys, &max_ndx);
1219 for (i = 0; i < max_ndx; ++i) {
1220 const int ndx = keys[i];
1221 splay_tree *node = sptree = splaytree_splay(sptree, ndx);
1222 if (node && node->key == ndx) {
1223 stat_cache_entry_free(node->data);
1224 sptree = splaytree_delete(sptree, ndx);
1225 }
1226 }
1227 } while (max_ndx == sizeof(keys)/sizeof(int));
1228 sc.files = sptree;
1229 }
1230
stat_cache_delete_tree(const char * name,uint32_t len)1231 static void stat_cache_delete_tree(const char *name, uint32_t len)
1232 {
1233 stat_cache_delete_entry(name, len);
1234 stat_cache_prune_dir_tree(name, len);
1235 }
1236
stat_cache_delete_dir(const char * name,uint32_t len)1237 void stat_cache_delete_dir(const char *name, uint32_t len)
1238 {
1239 force_assert(0 != len);
1240 if (name[len-1] == '/') { if (0 == --len) len = 1; }
1241 stat_cache_delete_tree(name, len);
1242 #ifdef HAVE_FAM_H
1243 if (sc.stat_cache_engine == STAT_CACHE_ENGINE_FAM) {
1244 splay_tree **sptree = &sc.scf->dirs;
1245 fam_dir_entry *fam_dir = stat_cache_sptree_find(sptree, name, len);
1246 if (fam_dir && buffer_eq_slen(&fam_dir->name, name, len))
1247 fam_dir_invalidate_node(fam_dir);
1248 if (*sptree) fam_dir_invalidate_tree(*sptree, name, len);
1249 fam_dir_periodic_cleanup();
1250 }
1251 #endif
1252 }
1253
1254 /***
1255 *
1256 *
1257 *
1258 * returns:
1259 * - HANDLER_FINISHED on cache-miss (don't forget to reopen the file)
1260 * - HANDLER_ERROR on stat() failed -> see errno for problem
1261 */
1262
stat_cache_get_entry(const buffer * const name)1263 stat_cache_entry * stat_cache_get_entry(const buffer * const name) {
1264 stat_cache_entry *sce = NULL;
1265
1266 /* consistency: ensure lookup name does not end in '/' unless root "/"
1267 * (but use full path given with stat(), even with trailing '/') */
1268 int final_slash = 0;
1269 size_t len = buffer_clen(name);
1270 force_assert(0 != len);
1271 if (name->ptr[len-1] == '/') { final_slash = 1; if (0 == --len) len = 1; }
1272 /* Note: paths are expected to be normalized before calling stat_cache,
1273 * e.g. without repeated '/' */
1274
1275 if (name->ptr[0] != '/') {
1276 errno = EINVAL;
1277 return NULL;
1278 }
1279
1280 /*
1281 * check if the directory for this file has changed
1282 */
1283
1284 const unix_time64_t cur_ts = log_monotonic_secs;
1285
1286 const int file_ndx = splaytree_djbhash(name->ptr, len);
1287 splay_tree *sptree = sc.files = splaytree_splay(sc.files, file_ndx);
1288
1289 if (sptree && (sptree->key == file_ndx)) {
1290 /* we have seen this file already and
1291 * don't stat() it again in the same second */
1292
1293 sce = sptree->data;
1294
1295 /* check if the name is the same, we might have a collision */
1296
1297 if (buffer_is_equal_string(&sce->name, name->ptr, len)) {
1298 if (sc.stat_cache_engine == STAT_CACHE_ENGINE_SIMPLE) {
1299 if (sce->stat_ts == cur_ts) {
1300 if (final_slash && !S_ISDIR(sce->st.st_mode)) {
1301 errno = ENOTDIR;
1302 return NULL;
1303 }
1304 return sce;
1305 }
1306 }
1307 #ifdef HAVE_FAM_H
1308 else if (sc.stat_cache_engine == STAT_CACHE_ENGINE_FAM
1309 && sce->fam_dir) { /* entry is in monitored dir */
1310 /* re-stat() periodically, even if monitoring for changes
1311 * (due to limitations in stat_cache.c use of FAM)
1312 * (gaps due to not continually monitoring an entire tree) */
1313 if (cur_ts - sce->stat_ts < 16) {
1314 if (final_slash && !S_ISDIR(sce->st.st_mode)) {
1315 errno = ENOTDIR;
1316 return NULL;
1317 }
1318 return sce;
1319 }
1320 }
1321 #endif
1322 } else {
1323 /* collision, forget about the entry */
1324 sce = NULL;
1325 }
1326 }
1327
1328 struct stat st;
1329 if (-1 == stat(name->ptr, &st)) {
1330 return NULL;
1331 }
1332
1333 if (NULL == sce) {
1334
1335 /* fix broken stat/open for symlinks to reg files with appended slash on freebsd,osx */
1336 if (final_slash && S_ISREG(st.st_mode)) {
1337 errno = ENOTDIR;
1338 return NULL;
1339 }
1340
1341 sce = stat_cache_entry_init();
1342 buffer_copy_string_len(&sce->name, name->ptr, len);
1343
1344 /* already splayed file_ndx */
1345 if (NULL != sptree && sptree->key == file_ndx) {
1346 /* hash collision: replace old entry */
1347 stat_cache_entry_free(sptree->data);
1348 sptree->data = sce;
1349 } else {
1350 /*sptree =*/ sc.files = splaytree_insert(sptree, file_ndx, sce);
1351 }
1352
1353 } else {
1354
1355 buffer_clear(&sce->etag);
1356 #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
1357 buffer_clear(&sce->content_type);
1358 #endif
1359
1360 /* close fd if file changed */
1361 if (sce->fd >= 0 && !stat_cache_stat_eq(&sce->st, &st)) {
1362 if (1 == sce->refcnt) {
1363 close(sce->fd);
1364 sce->fd = -1;
1365 }
1366 else {
1367 --sce->refcnt; /* stat_cache_entry_free(sce); */
1368 sptree->data = sce = stat_cache_entry_init();
1369 buffer_copy_string_len(&sce->name, name->ptr, len);
1370 }
1371 }
1372 }
1373
1374 sce->st = st; /*(copy prior to calling fam_dir_monitor())*/
1375
1376 #ifdef HAVE_FAM_H
1377 if (sc.stat_cache_engine == STAT_CACHE_ENGINE_FAM) {
1378 if (sce->fam_dir) --((fam_dir_entry *)sce->fam_dir)->refcnt;
1379 sce->fam_dir =
1380 fam_dir_monitor(sc.scf, name->ptr, len, &st);
1381 #if 0 /*(performed below)*/
1382 if (NULL != sce->fam_dir) {
1383 /*(may have been invalidated by dir change)*/
1384 sce->stat_ts = cur_ts;
1385 }
1386 #endif
1387 }
1388 #endif
1389
1390 sce->stat_ts = cur_ts;
1391 return sce;
1392 }
1393
stat_cache_get_entry_open(const buffer * const name,const int symlinks)1394 stat_cache_entry * stat_cache_get_entry_open(const buffer * const name, const int symlinks) {
1395 stat_cache_entry * const sce = stat_cache_get_entry(name);
1396 if (NULL == sce) return NULL;
1397 if (sce->fd >= 0) return sce;
1398 if (sce->st.st_size > 0) {
1399 sce->fd = stat_cache_open_rdonly_fstat(name, &sce->st, symlinks);
1400 buffer_clear(&sce->etag);
1401 }
1402 return sce; /* (note: sce->fd might still be -1 if open() failed) */
1403 }
1404
stat_cache_path_stat(const buffer * const name)1405 const stat_cache_st * stat_cache_path_stat (const buffer * const name) {
1406 const stat_cache_entry * const sce = stat_cache_get_entry(name);
1407 return sce ? &sce->st : NULL;
1408 }
1409
stat_cache_path_isdir(const buffer * name)1410 int stat_cache_path_isdir(const buffer *name) {
1411 const stat_cache_entry * const sce = stat_cache_get_entry(name);
1412 return (sce && (S_ISDIR(sce->st.st_mode) ? 1 : (errno = ENOTDIR, 0)));
1413 }
1414
stat_cache_path_contains_symlink(const buffer * name,log_error_st * errh)1415 int stat_cache_path_contains_symlink(const buffer *name, log_error_st *errh) {
1416 /* caller should check for symlinks only if we should block symlinks. */
1417
1418 /* catch the obvious symlinks
1419 *
1420 * this is not a secure check as we still have a race-condition between
1421 * the stat() and the open. We can only solve this by
1422 * 1. open() the file
1423 * 2. fstat() the fd
1424 *
1425 * and keeping the file open for the rest of the time. But this can
1426 * only be done at network level.
1427 * */
1428
1429 #ifdef HAVE_LSTAT
1430 /* we assume "/" can not be symlink,
1431 * so skip the symlink stuff if path is "/" */
1432 size_t len = buffer_clen(name);
1433 force_assert(0 != len);
1434 force_assert(name->ptr[0] == '/');
1435 if (1 == len) return 0;
1436 #ifndef PATH_MAX
1437 #define PATH_MAX 4096
1438 #endif
1439 if (len >= PATH_MAX) return -1;
1440
1441 char buf[PATH_MAX];
1442 memcpy(buf, name->ptr, len);
1443 char *s_cur = buf+len;
1444 do {
1445 *s_cur = '\0';
1446 struct stat st;
1447 if (0 == lstat(buf, &st)) {
1448 if (S_ISLNK(st.st_mode)) return 1;
1449 }
1450 else {
1451 log_perror(errh, __FILE__, __LINE__, "lstat failed for: %s", buf);
1452 return -1;
1453 }
1454 } while ((s_cur = strrchr(buf, '/')) > buf); /*(&buf[0]==buf; NULL < buf)*/
1455 #endif
1456
1457 return 0;
1458 }
1459
stat_cache_open_rdonly_fstat(const buffer * name,struct stat * st,int symlinks)1460 int stat_cache_open_rdonly_fstat (const buffer *name, struct stat *st, int symlinks) {
1461 /*(Note: O_NOFOLLOW affects only the final path segment, the target file,
1462 * not any intermediate symlinks along the path)*/
1463 const int fd = fdevent_open_cloexec(name->ptr, symlinks, O_RDONLY, 0);
1464 if (fd >= 0) {
1465 if (0 == fstat(fd, st)) {
1466 return fd;
1467 } else {
1468 const int errnum = errno;
1469 close(fd);
1470 errno = errnum;
1471 }
1472 }
1473 return -1;
1474 }
1475
1476 /**
1477 * remove stat() from cache which haven't been stat()ed for
1478 * more than 2 seconds
1479 *
1480 *
1481 * walk though the stat-cache, collect the ids which are too old
1482 * and remove them in a second loop
1483 */
1484
stat_cache_tag_old_entries(splay_tree * const t,int * const keys,int * const ndx,const time_t max_age,const unix_time64_t cur_ts)1485 static void stat_cache_tag_old_entries(splay_tree * const t, int * const keys, int * const ndx, const time_t max_age, const unix_time64_t cur_ts) {
1486 if (*ndx == 8192) return; /*(must match num array entries in keys[])*/
1487 if (t->left)
1488 stat_cache_tag_old_entries(t->left, keys, ndx, max_age, cur_ts);
1489 if (t->right)
1490 stat_cache_tag_old_entries(t->right, keys, ndx, max_age, cur_ts);
1491 if (*ndx == 8192) return; /*(must match num array entries in keys[])*/
1492
1493 const stat_cache_entry * const sce = t->data;
1494 if (cur_ts - sce->stat_ts > max_age)
1495 keys[(*ndx)++] = t->key;
1496 }
1497
stat_cache_periodic_cleanup(const time_t max_age,const unix_time64_t cur_ts)1498 static void stat_cache_periodic_cleanup(const time_t max_age, const unix_time64_t cur_ts) {
1499 splay_tree *sptree = sc.files;
1500 int max_ndx, i;
1501 int keys[8192]; /* 32k size on stack */
1502 do {
1503 if (!sptree) break;
1504 max_ndx = 0;
1505 stat_cache_tag_old_entries(sptree, keys, &max_ndx, max_age, cur_ts);
1506 for (i = 0; i < max_ndx; ++i) {
1507 int ndx = keys[i];
1508 sptree = splaytree_splay(sptree, ndx);
1509 if (sptree && sptree->key == ndx) {
1510 stat_cache_entry_free(sptree->data);
1511 sptree = splaytree_delete(sptree, ndx);
1512 }
1513 }
1514 } while (max_ndx == sizeof(keys)/sizeof(int));
1515 sc.files = sptree;
1516 }
1517
stat_cache_trigger_cleanup(void)1518 void stat_cache_trigger_cleanup(void) {
1519 time_t max_age = 2;
1520
1521 #ifdef HAVE_FAM_H
1522 if (STAT_CACHE_ENGINE_FAM == sc.stat_cache_engine) {
1523 if (log_monotonic_secs & 0x1F) return;
1524 /* once every 32 seconds (0x1F == 31) */
1525 max_age = 32;
1526 fam_dir_periodic_cleanup();
1527 /* By doing this before stat_cache_periodic_cleanup(),
1528 * entries used within the next max_age secs will remain
1529 * monitored, instead of effectively flushing and
1530 * rebuilding the FAM monitoring every max_age seconds */
1531 }
1532 #endif
1533
1534 stat_cache_periodic_cleanup(max_age, log_monotonic_secs);
1535 }
1536