xref: /lighttpd1.4/src/mod_accesslog.c (revision 708211e2)
1 #include "first.h"
2 
3 #include "sys-time.h"
4 
5 #include "base.h"
6 #include "fdevent.h"
7 #include "fdlog.h"
8 #include "log.h"
9 #include "buffer.h"
10 #include "http_header.h"
11 #include "response.h"
12 #include "sock_addr.h"
13 
14 #include "plugin.h"
15 
16 #include <sys/types.h>
17 #include <sys/stat.h>
18 
19 #include <stdlib.h>
20 #include <string.h>
21 #include <fcntl.h>
22 #include <unistd.h>
23 
24 #ifdef HAVE_SYSLOG_H
25 # include <syslog.h>
26 #endif
27 
28 typedef struct {
29 	char key;
30 	enum {
31 			FORMAT_UNSET,
32 			FORMAT_LITERAL,
33 			FORMAT_HEADER,
34 			FORMAT_RESPONSE_HEADER,
35 			FORMAT_ENV,
36 			FORMAT_TIMESTAMP,
37 			FORMAT_TIME_USED,
38 			FORMAT_REMOTE_ADDR,
39 			FORMAT_HTTP_HOST,
40 			FORMAT_REQUEST_LINE,
41 			FORMAT_STATUS,
42 			FORMAT_BYTES_OUT_NO_HEADER,
43 			FORMAT_BYTES_OUT,
44 			FORMAT_BYTES_IN,
45 			FORMAT_SERVER_NAME,
46 			FORMAT_REQUEST_PROTOCOL,
47 			FORMAT_REQUEST_METHOD,
48 			FORMAT_COOKIE,
49 
50 			FORMAT_SERVER_PORT,
51 			FORMAT_LOCAL_ADDR,
52 			FORMAT_KEEPALIVE_COUNT,
53 			FORMAT_URL,
54 			FORMAT_QUERY_STRING,
55 			FORMAT_FILENAME,
56 			FORMAT_CONNECTION_STATUS,
57 			FORMAT_NOTE,        /* same as FORMAT_ENV */
58 			FORMAT_REMOTE_HOST, /* same as FORMAT_REMOTE_ADDR */
59 			FORMAT_REMOTE_USER, /* redirected to FORMAT_ENV */
60 			FORMAT_TIME_USED_US,/* redirected to FORMAT_TIME_USED */
61 			/*(parsed and replaced at startup)*/
62 			FORMAT_REMOTE_IDENT,
63 			FORMAT_PERCENT,
64 
65 			FORMAT_UNSUPPORTED
66 	} tag;
67 } format_mapping;
68 
69 /**
70  *
71  *
72  * "%h %V %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\""
73  *
74  */
75 
76 static const format_mapping fmap[] =
77 {
78 	{ '%', FORMAT_PERCENT },      /*(parsed and replaced at startup)*/
79 	{ 'a', FORMAT_REMOTE_ADDR },
80 	{ 'A', FORMAT_LOCAL_ADDR },
81 	{ 'b', FORMAT_BYTES_OUT_NO_HEADER },
82 	{ 'B', FORMAT_BYTES_OUT_NO_HEADER },
83 	{ 'C', FORMAT_COOKIE },
84 	{ 'D', FORMAT_TIME_USED_US },
85 	{ 'e', FORMAT_ENV },
86 	{ 'f', FORMAT_FILENAME },
87 	{ 'h', FORMAT_REMOTE_HOST },  /*(parsed and redirected at startup)*/
88 	{ 'H', FORMAT_REQUEST_PROTOCOL },
89 	{ 'i', FORMAT_HEADER },
90 	{ 'I', FORMAT_BYTES_IN },
91 	{ 'k', FORMAT_KEEPALIVE_COUNT },
92 	{ 'l', FORMAT_REMOTE_IDENT }, /*(parsed and replaced at startup)*/
93 	{ 'm', FORMAT_REQUEST_METHOD },
94 	{ 'n', FORMAT_NOTE },         /*(parsed and redirected at startup)*/
95 	{ 'o', FORMAT_RESPONSE_HEADER },
96 	{ 'O', FORMAT_BYTES_OUT },
97 	{ 'p', FORMAT_SERVER_PORT },
98 	{ 'P', FORMAT_UNSUPPORTED }, /* we are only one process */
99 	{ 'q', FORMAT_QUERY_STRING },
100 	{ 'r', FORMAT_REQUEST_LINE },
101 	{ 's', FORMAT_STATUS },
102 	{ 't', FORMAT_TIMESTAMP },
103 	{ 'T', FORMAT_TIME_USED },
104 	{ 'u', FORMAT_REMOTE_USER },  /*(parsed and redirected at startup)*/
105 	{ 'U', FORMAT_URL }, /* w/o querystring */
106 	{ 'v', FORMAT_SERVER_NAME },
107 	{ 'V', FORMAT_HTTP_HOST },
108 	{ 'X', FORMAT_CONNECTION_STATUS },
109 
110 	{ '\0', FORMAT_UNSET }
111 };
112 
113 
114 enum e_optflags_time {
115 	/* format string is passed to strftime unless other format optflags set
116 	 * (besides FORMAT_FLAG_TIME_BEGIN or FORMAT_FLAG_TIME_END) */
117 	FORMAT_FLAG_TIME_END       = 0x00,/* use request end time (default) */
118 	FORMAT_FLAG_TIME_BEGIN     = 0x01,/* use request start time */
119 	FORMAT_FLAG_TIME_SEC       = 0x02,/* request time as num  sec since epoch */
120 	FORMAT_FLAG_TIME_MSEC      = 0x04,/* request time as num msec since epoch */
121 	FORMAT_FLAG_TIME_USEC      = 0x08,/* request time as num usec since epoch */
122 	FORMAT_FLAG_TIME_NSEC      = 0x10,/* request time as num nsec since epoch */
123 	FORMAT_FLAG_TIME_MSEC_FRAC = 0x20,/* request time msec fraction */
124 	FORMAT_FLAG_TIME_USEC_FRAC = 0x40,/* request time usec fraction */
125 	FORMAT_FLAG_TIME_NSEC_FRAC = 0x80 /* request time nsec fraction */
126 };
127 
128 enum e_optflags_port {
129 	FORMAT_FLAG_PORT_LOCAL     = 0x01,/* (default) */
130 	FORMAT_FLAG_PORT_REMOTE    = 0x02
131 };
132 
133 
134 typedef struct {
135     int field;
136     int opt;
137     buffer string;
138 } format_field;
139 
140 typedef struct {
141     unix_time64_t last_generated_accesslog_ts;
142     buffer ts_accesslog_str;
143   #if defined(__STDC_VERSION__) && __STDC_VERSION__-0 >= 199901L /* C99 */
144     format_field ptr[];  /* C99 VLA */
145   #else
146     format_field ptr[1];
147   #endif
148 } format_fields;
149 
150 typedef struct {
151 	fdlog_st *fdlog;
152 	char use_syslog; /* syslog has global buffer */
153 	uint8_t escaping;
154 	unsigned short syslog_level;
155 
156 	format_fields *parsed_format;
157 } plugin_config;
158 
159 typedef struct {
160     PLUGIN_DATA;
161     plugin_config defaults;
162     plugin_config conf;
163 
164     format_fields *default_format;/* allocated if default format */
165 } plugin_data;
166 
167 typedef void(esc_fn_t)(buffer * restrict b, const char * restrict s, size_t len);
168 
169 typedef enum {
170     BS_ESCAPE_DEFAULT
171    ,BS_ESCAPE_JSON
172 } buffer_bs_escape_t;
173 
INIT_FUNC(mod_accesslog_init)174 INIT_FUNC(mod_accesslog_init) {
175     return ck_calloc(1, sizeof(plugin_data));
176 }
177 
178 __attribute_cold__
accesslog_parse_format_err(log_error_st * errh,const char * file,unsigned int line,format_field * f,const char * msg)179 static format_fields * accesslog_parse_format_err(log_error_st *errh, const char *file, unsigned int line, format_field *f, const char *msg) {
180     log_error(errh, file, line, "%s", msg);
181     for (; f->field != FORMAT_UNSET; ++f) free(f->string.ptr);
182     return NULL;
183 }
184 
accesslog_parse_format_token(const char c)185 static int accesslog_parse_format_token (const char c) {
186     const format_mapping *p = fmap;
187     while (p->key != c && p->key != '\0') ++p;
188     return p->tag;
189 }
190 
accesslog_parse_format(const char * const format,const uint32_t flen,log_error_st * const errh)191 static format_fields * accesslog_parse_format(const char * const format, const uint32_t flen, log_error_st * const errh) {
192     /* common log format (the default) results in 18 elements,
193      * so 127 should be enough except for obscene custom usage */
194     uint32_t used = 0;
195     const uint32_t sz = 127;/* (sz+1 must match fptr[] num elts below) */
196     format_field *f;
197     format_field fptr[128]; /* (128 elements takes 4k on stack in 64-bit) */
198     memset(fptr, 0, sizeof(fptr));
199     /*assert(FORMAT_UNSET == 0);*/
200     if (0 == flen) return NULL;
201     uint32_t i = 0;
202     do {
203         uint32_t start = i;
204         while (format[i] != '%' && ++i < flen) ;
205         if (start != i) {
206             /* save string from start to i */
207             if (used && (f = fptr+used-1)->field == FORMAT_LITERAL)
208                 buffer_append_string_len(&f->string, format + start, i - start);
209             else {
210                 if (used == sz)
211                     return accesslog_parse_format_err(errh, __FILE__, __LINE__, fptr,
212                              "too many fields (>= 127) in accesslog.format");
213 
214                 f = fptr + used++;
215                 f->field = FORMAT_LITERAL;
216                 buffer_copy_string_len(&f->string, format + start, i - start);
217             }
218         }
219         if (i == flen) break;
220 
221         uint32_t k = ++i; /* step over '%' */
222         if (i == flen)
223             return accesslog_parse_format_err(errh, __FILE__, __LINE__, fptr,
224               "% must be followed by a format-specifier");
225 
226         /* we need a new field */
227         if (used == sz)
228             return accesslog_parse_format_err(errh, __FILE__, __LINE__, fptr,
229               "too many fields (>= 127) in accesslog.format");
230 
231         /* search for the terminating command */
232         if (format[i] == '{') {
233             k = ++i;
234             while (i < flen && format[i] != '}') ++i;
235             if (i == flen || i == k) {
236                 return accesslog_parse_format_err(errh,__FILE__,__LINE__,fptr,
237                   "%{...} must contain string and %{ must be terminated by }");
238             }
239             ++i;
240         }
241         else {
242             /* skip over (ignore) '<' or '>' */
243             if (format[i] == '<' || format[i] == '>') k = ++i;
244 
245             /* special-case "%%" and "%l" */
246             if (i < flen && (format[i] == '%' || format[i] == 'l')) {
247                 /* replace "%%" with literal '%' */
248                 /* replace "%l" with literal '-'; ignore remote ident */
249                 if (0 == used || (f = fptr+used-1)->field != FORMAT_LITERAL) {
250                     f = fptr + used++;
251                     f->field = FORMAT_LITERAL;
252                 }
253                 buffer_append_char(&f->string, format[i] == '%' ? '%' : '-');
254                 continue;
255             }
256         }
257         /* add field */
258         f = fptr + used++;
259         if (i != k) /* %{...} */
260             buffer_copy_string_len(&f->string, format + k, i - k - 1);
261         f->field = (i < flen)
262           ? accesslog_parse_format_token(format[i])
263           : FORMAT_UNSET;
264 
265         if (f->field == FORMAT_UNSET)
266             return accesslog_parse_format_err(errh, __FILE__, __LINE__, fptr,
267               "% or %{...} must be followed by a valid format-specifier");
268     } while (++i < flen);
269 
270     format_fields * const fields =
271       ck_malloc(sizeof(format_fields) + ((used+1) * sizeof(format_field)));
272     memset(fields, 0, sizeof(format_fields));
273     memcpy(fields->ptr, fptr, (used+1) * sizeof(format_field));
274     return fields;
275 }
276 
mod_accesslog_free_format_fields(format_fields * const ff)277 static void mod_accesslog_free_format_fields(format_fields * const ff) {
278     for (format_field *f = ff->ptr; f->field != FORMAT_UNSET; ++f)
279         free(f->string.ptr);
280     free(ff->ts_accesslog_str.ptr);
281     free(ff);
282 }
283 
FREE_FUNC(mod_accesslog_free)284 FREE_FUNC(mod_accesslog_free) {
285     plugin_data * const p = p_d;
286     if (NULL == p->cvlist) return;
287     /* (init i to 0 if global context; to 1 to skip empty global context) */
288     for (int i = !p->cvlist[0].v.u2[1], used = p->nconfig; i < used; ++i) {
289         config_plugin_value_t *cpv = p->cvlist + p->cvlist[i].v.u2[0];
290         for (; -1 != cpv->k_id; ++cpv) {
291             if (cpv->vtype != T_CONFIG_LOCAL || NULL == cpv->v.v) continue;
292             switch (cpv->k_id) {
293               case 0: /* accesslog.filename */
294                 /*(handled by fdlog_closeall())*/
295                 break;
296               case 1: /* accesslog.format */
297                 mod_accesslog_free_format_fields(cpv->v.v);
298                 break;
299               default:
300                 break;
301             }
302         }
303     }
304 
305     if (NULL != p->default_format) {
306         mod_accesslog_free_format_fields(p->default_format);
307     }
308 }
309 
mod_accesslog_merge_config_cpv(plugin_config * const pconf,const config_plugin_value_t * const cpv)310 static void mod_accesslog_merge_config_cpv(plugin_config * const pconf, const config_plugin_value_t * const cpv) {
311     switch (cpv->k_id) { /* index into static config_plugin_keys_t cpk[] */
312       case 0:{/* accesslog.filename */
313         if (cpv->vtype != T_CONFIG_LOCAL) break;
314         pconf->fdlog = cpv->v.v;
315         break;
316       }
317       case 1:{/* accesslog.format */
318         if (cpv->vtype != T_CONFIG_LOCAL) break;
319         pconf->parsed_format = cpv->v.v;
320         break;
321       }
322       case 2: /* accesslog.use-syslog */
323         pconf->use_syslog = (int)cpv->v.u;
324         break;
325       case 3: /* accesslog.syslog-level */
326         pconf->syslog_level = cpv->v.shrt;
327         break;
328       case 4: /* accesslog.escaping */
329         if (cpv->vtype != T_CONFIG_LOCAL) break;
330         pconf->escaping = (int)cpv->v.u;
331         break;
332       default:/* should not happen */
333         return;
334     }
335 }
336 
mod_accesslog_merge_config(plugin_config * const pconf,const config_plugin_value_t * cpv)337 static void mod_accesslog_merge_config(plugin_config * const pconf, const config_plugin_value_t *cpv) {
338     do {
339         mod_accesslog_merge_config_cpv(pconf, cpv);
340     } while ((++cpv)->k_id != -1);
341 }
342 
mod_accesslog_patch_config(request_st * const r,plugin_data * const p)343 static void mod_accesslog_patch_config(request_st * const r, plugin_data * const p) {
344     memcpy(&p->conf, &p->defaults, sizeof(plugin_config));
345     for (int i = 1, used = p->nconfig; i < used; ++i) {
346         if (config_check_cond(r, (uint32_t)p->cvlist[i].k_id))
347             mod_accesslog_merge_config(&p->conf, p->cvlist + p->cvlist[i].v.u2[0]);
348     }
349 }
350 
351 static format_fields * mod_accesslog_process_format(const char * const format, const uint32_t flen, server * const srv);
352 
SETDEFAULTS_FUNC(mod_accesslog_set_defaults)353 SETDEFAULTS_FUNC(mod_accesslog_set_defaults) {
354     static const config_plugin_keys_t cpk[] = {
355       { CONST_STR_LEN("accesslog.filename"),
356         T_CONFIG_STRING,
357         T_CONFIG_SCOPE_CONNECTION }
358      ,{ CONST_STR_LEN("accesslog.format"),
359         T_CONFIG_STRING,
360         T_CONFIG_SCOPE_CONNECTION }
361      ,{ CONST_STR_LEN("accesslog.use-syslog"),
362         T_CONFIG_BOOL,
363         T_CONFIG_SCOPE_CONNECTION }
364      ,{ CONST_STR_LEN("accesslog.syslog-level"),
365         T_CONFIG_SHORT,
366         T_CONFIG_SCOPE_CONNECTION }
367      ,{ CONST_STR_LEN("accesslog.escaping"),
368         T_CONFIG_STRING,
369         T_CONFIG_SCOPE_CONNECTION }
370      ,{ NULL, 0,
371         T_CONFIG_UNSET,
372         T_CONFIG_SCOPE_UNSET }
373     };
374 
375     plugin_data * const p = p_d;
376     if (!config_plugin_values_init(srv, p, cpk, "mod_accesslog"))
377         return HANDLER_ERROR;
378 
379     /* process and validate config directives
380      * (init i to 0 if global context; to 1 to skip empty global context) */
381     for (int i = !p->cvlist[0].v.u2[1]; i < p->nconfig; ++i) {
382         config_plugin_value_t *cpv = p->cvlist + p->cvlist[i].v.u2[0];
383         int use_syslog = 0;
384         config_plugin_value_t *cpvfile = NULL;
385         for (; -1 != cpv->k_id; ++cpv) {
386             switch (cpv->k_id) {
387               case 0: /* accesslog.filename */
388                 if (!buffer_is_blank(cpv->v.b))
389                     cpvfile = cpv;
390                 else {
391                     cpv->v.v = NULL;
392                     cpv->vtype = T_CONFIG_LOCAL;
393                 }
394                 break;
395               case 1: /* accesslog.format */
396                 if (NULL != strchr(cpv->v.b->ptr, '\\')) {
397                     /* process basic backslash-escapes in format string */
398                     buffer *b;
399                     *(const buffer **)&b = cpv->v.b;
400                     char *t = b->ptr;
401                     for (char *s = t; *s; ++s) {
402                         if (s[0] != '\\') { *t++ = *s; continue; }
403                         if (s[1] == '\0') continue; /*(ignore dangling '\\')*/
404                         switch (*++s) {
405                           case 'a': *t++ = '\a'; break; /* bell */
406                           case 'b': *t++ = '\b'; break; /* backspace */
407                           case 'f': *t++ = '\f'; break; /* form feed */
408                           case 'n': *t++ = '\n'; break; /* newline */
409                           case 'r': *t++ = '\r'; break; /* carriage return */
410                           case 't': *t++ = '\t'; break; /* horizontal tab */
411                           case 'v': *t++ = '\v'; break; /* vertical tab */
412                           /*case '"':*/
413                           /*case '\\':*/
414                           default:  *t++ = *s;   break; /*(use literal char)*/
415                         }
416                     }
417                     buffer_truncate(b, (size_t)(t - b->ptr));
418                 }
419                 cpv->v.v =
420                   mod_accesslog_process_format(BUF_PTR_LEN(cpv->v.b), srv);
421                 if (NULL == cpv->v.v) return HANDLER_ERROR;
422                 cpv->vtype = T_CONFIG_LOCAL;
423                 break;
424               case 2: /* accesslog.use-syslog */
425                 use_syslog = (int)cpv->v.u;
426                 break;
427               case 3: /* accesslog.syslog-level */
428                 break;
429               case 4: /* accesslog.escaping */
430                 /* quick parse: 0 == "default", 1 == "json" */
431                 cpv->v.u = (0 == strcmp(cpv->v.b->ptr, "json"))
432                   ? BS_ESCAPE_JSON
433                   : BS_ESCAPE_DEFAULT;
434                 cpv->vtype = T_CONFIG_LOCAL;
435                 break;
436               default:/* should not happen */
437                 break;
438             }
439         }
440 
441         if (srv->srvconf.preflight_check) continue;
442 
443         if (use_syslog) continue; /* ignore the next checks */
444         cpv = cpvfile; /* accesslog.filename handled after preflight_check */
445         if (NULL == cpv) continue;
446         const char * const fn = cpv->v.b->ptr;
447         cpv->v.v = fdlog_open(fn);
448         cpv->vtype = T_CONFIG_LOCAL;
449         if (NULL == cpv->v.v) {
450             log_perror(srv->errh, __FILE__, __LINE__,
451               "opening log '%s' failed", fn);
452             return HANDLER_ERROR;
453         }
454     }
455 
456     p->defaults.syslog_level = LOG_INFO;
457 
458     /* initialize p->defaults from global config context */
459     if (p->nconfig > 0 && p->cvlist->v.u2[1]) {
460         const config_plugin_value_t *cpv = p->cvlist + p->cvlist->v.u2[0];
461         if (-1 != cpv->k_id)
462             mod_accesslog_merge_config(&p->defaults, cpv);
463     }
464 
465     if (NULL == p->defaults.parsed_format) {
466         /* (set default format even if p->use_syslog since
467          *  some other condition might enable logfile) */
468         static const char fmt[] =
469           "%h %V %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"";
470         p->defaults.parsed_format = p->default_format =
471           mod_accesslog_process_format(CONST_STR_LEN(fmt), srv);
472         if (NULL == p->default_format) return HANDLER_ERROR;
473     }
474 
475     return HANDLER_GO_ON;
476 }
477 
mod_accesslog_process_format(const char * const format,const uint32_t flen,server * const srv)478 static format_fields * mod_accesslog_process_format(const char * const format, const uint32_t flen, server * const srv) {
479 			format_fields * const parsed_format =
480 			  accesslog_parse_format(format, flen, srv->errh);
481 			if (NULL == parsed_format) {
482 				log_error(srv->errh, __FILE__, __LINE__,
483 					"parsing accesslog-definition failed: %s", format);
484 				return NULL;
485 			}
486 
487 			uint32_t tcount = 0;
488 			for (format_field *f = parsed_format->ptr; f->field != FORMAT_UNSET; ++f) {
489 				buffer * const fstr = &f->string;
490 				if (FORMAT_LITERAL == f->field) continue;
491 				if (FORMAT_TIMESTAMP == f->field) {
492 					if (!buffer_is_blank(fstr)) {
493 						const char * const ptr = fstr->ptr;
494 						uint32_t len = buffer_clen(fstr);
495 						if (0 == strncmp(ptr, "begin:", sizeof("begin:")-1)) {
496 							f->opt |= FORMAT_FLAG_TIME_BEGIN;
497 							memmove(fstr->ptr, fstr->ptr+6, len-6); /*"begin:"*/
498 							buffer_truncate(fstr, len-6);
499 						} else if (0 == strncmp(ptr, "end:", sizeof("end:")-1)) {
500 							f->opt |= FORMAT_FLAG_TIME_END;
501 							memmove(fstr->ptr, fstr->ptr+4, len-4); /*"end:"*/
502 							buffer_truncate(fstr, len-4);
503 						}
504 						if      (0 == strcmp(ptr, "sec"))       f->opt |= FORMAT_FLAG_TIME_SEC;
505 						else if (0 == strcmp(ptr, "msec"))      f->opt |= FORMAT_FLAG_TIME_MSEC;
506 						else if (0 == strcmp(ptr, "usec"))      f->opt |= FORMAT_FLAG_TIME_USEC;
507 						else if (0 == strcmp(ptr, "nsec"))      f->opt |= FORMAT_FLAG_TIME_NSEC;
508 						else if (0 == strcmp(ptr, "msec_frac")) f->opt |= FORMAT_FLAG_TIME_MSEC_FRAC;
509 						else if (0 == strcmp(ptr, "usec_frac")) f->opt |= FORMAT_FLAG_TIME_USEC_FRAC;
510 						else if (0 == strcmp(ptr, "nsec_frac")) f->opt |= FORMAT_FLAG_TIME_NSEC_FRAC;
511 						else if (NULL == strchr(ptr, '%')) {
512 							log_error(srv->errh, __FILE__, __LINE__,
513 								"constant string for time format (misspelled token? or missing '%%'): %s", format);
514 							mod_accesslog_free_format_fields(parsed_format);
515 							return NULL;
516 						}
517 					}
518 
519 					/* make sure they didn't try to send the timestamp in twice
520 					 * (would invalidate pconf->parsed_format.ts_accesslog_str cache of timestamp str) */
521 					if (!(f->opt & ~(FORMAT_FLAG_TIME_BEGIN|FORMAT_FLAG_TIME_END|FORMAT_FLAG_TIME_SEC)) && ++tcount > 1) {
522 						log_error(srv->errh, __FILE__, __LINE__,
523 							"you may not use strftime timestamp format %%{}t twice in the same access log: %s", format);
524 						mod_accesslog_free_format_fields(parsed_format);
525 						return NULL;
526 					}
527 
528 					if (f->opt & FORMAT_FLAG_TIME_BEGIN) srv->srvconf.high_precision_timestamps = 1;
529 				} else if (FORMAT_TIME_USED_US == f->field) {
530 					f->opt |= FORMAT_FLAG_TIME_USEC;
531 					f->field = FORMAT_TIME_USED;
532 					srv->srvconf.high_precision_timestamps = 1;
533 				} else if (FORMAT_TIME_USED == f->field) {
534 					const char * const ptr = fstr->ptr;
535 					if (buffer_is_blank(fstr)
536 					      || 0 == strcmp(ptr, "s")
537 					      || 0 == strcmp(ptr, "sec"))  f->opt |= FORMAT_FLAG_TIME_SEC;
538 					else if (0 == strcmp(ptr, "ms")
539 					      || 0 == strcmp(ptr, "msec")) f->opt |= FORMAT_FLAG_TIME_MSEC;
540 					else if (0 == strcmp(ptr, "us")
541 					      || 0 == strcmp(ptr, "usec")) f->opt |= FORMAT_FLAG_TIME_USEC;
542 					else if (0 == strcmp(ptr, "ns")
543 					      || 0 == strcmp(ptr, "nsec")) f->opt |= FORMAT_FLAG_TIME_NSEC;
544 					else {
545 						log_error(srv->errh, __FILE__, __LINE__,
546 							"invalid time unit in %%{UNIT}T: %s", format);
547 						mod_accesslog_free_format_fields(parsed_format);
548 						return NULL;
549 					}
550 
551 					if (f->opt & ~(FORMAT_FLAG_TIME_SEC)) srv->srvconf.high_precision_timestamps = 1;
552 				} else if (FORMAT_COOKIE == f->field) {
553 					if (buffer_is_blank(fstr)) f->field = FORMAT_LITERAL; /*(blank)*/
554 				} else if (FORMAT_SERVER_PORT == f->field) {
555 					const char * const ptr = fstr->ptr;
556 					if (buffer_is_blank(fstr))
557 						f->opt |= FORMAT_FLAG_PORT_LOCAL;
558 					else if (0 == strcmp(ptr, "canonical"))
559 						f->opt |= FORMAT_FLAG_PORT_LOCAL;
560 					else if (0 == strcmp(ptr, "local"))
561 						f->opt |= FORMAT_FLAG_PORT_LOCAL;
562 					else if (0 == strcmp(ptr, "remote"))
563 						f->opt |= FORMAT_FLAG_PORT_REMOTE;
564 					else {
565 						log_error(srv->errh, __FILE__, __LINE__,
566 							"invalid format %%{canonical,local,remote}p: %s", format);
567 						mod_accesslog_free_format_fields(parsed_format);
568 						return NULL;
569 					}
570 				} else if (FORMAT_HEADER == f->field
571 				           || FORMAT_RESPONSE_HEADER == f->field) {
572 					f->opt = http_header_hkey_get(BUF_PTR_LEN(fstr));
573 				} else if (FORMAT_REMOTE_HOST == f->field
574 				           || FORMAT_REMOTE_ADDR == f->field) {
575 					f->field = FORMAT_REMOTE_ADDR;
576 					const char * const ptr = fstr->ptr;
577 					if (buffer_is_blank(fstr)) {
578 					}
579 					else if (0 == strcmp(ptr, "mask"))
580 						f->opt = 1; /* mask IP addr lower bits (partial anon) */
581 					else {
582 						log_error(srv->errh, __FILE__, __LINE__,
583 							"invalid format %%{mask}a: %s", format);
584 						mod_accesslog_free_format_fields(parsed_format);
585 						return NULL;
586 					}
587 				} else if (FORMAT_REMOTE_USER == f->field) {
588 					f->field = FORMAT_ENV;
589 					buffer_copy_string_len(fstr, CONST_STR_LEN("REMOTE_USER"));
590 				} else if (FORMAT_NOTE == f->field) {
591 					f->field = FORMAT_ENV;
592 				}
593 			}
594 
595 			return parsed_format;
596 }
597 
TRIGGER_FUNC(log_access_periodic_flush)598 TRIGGER_FUNC(log_access_periodic_flush) {
599     UNUSED(p_d);
600     /* flush buffered access logs every 4 seconds */
601     if (0 == (log_monotonic_secs & 3)) fdlog_files_flush(srv->errh, 0);
602     return HANDLER_GO_ON;
603 }
604 
605 static void
accesslog_append_buffer(buffer * const restrict dest,const buffer * const restrict b,esc_fn_t esc_fn)606 accesslog_append_buffer (buffer * const restrict dest,
607                          const buffer * const restrict b, esc_fn_t esc_fn)
608 {
609     if (!buffer_string_is_empty(b))
610         esc_fn(dest, BUF_PTR_LEN(b));
611     else
612         buffer_append_char(dest, '-');
613 }
614 
615 static void
accesslog_append_bytes(buffer * const dest,off_t bytes,const uint32_t adj)616 accesslog_append_bytes (buffer * const dest, off_t bytes, const uint32_t adj)
617 {
618     if (bytes > 0)
619         buffer_append_int(dest, (bytes -= (off_t)adj) > 0 ? bytes : 0);
620     else
621         buffer_append_char(dest, '-');
622 }
623 
624 __attribute_cold__
625 __attribute_noinline__
626 static void
accesslog_append_cookie(buffer * const restrict dest,const request_st * const restrict r,const buffer * const restrict name,esc_fn_t esc_fn)627 accesslog_append_cookie (buffer * const restrict dest,
628                          const request_st * const restrict r,
629                          const buffer * const restrict name,
630                          esc_fn_t esc_fn)
631 {
632     const buffer * const vb =
633       http_header_request_get(r, HTTP_HEADER_COOKIE, CONST_STR_LEN("Cookie"));
634     if (NULL == vb) return;
635 
636     char *str = vb->ptr;
637     size_t len = buffer_clen(name);
638     do {
639         while (*str == ' ' || *str == '\t') ++str;
640         if (0 == strncmp(str, name->ptr, len) && str[len] == '=') {
641             char *v = str+len+1;
642             for (str = v; *str != '\0' && *str != ';'; ++str) ;
643             if (str == v) break;
644             do { --str; } while (str > v && (*str == ' ' || *str == '\t'));
645             esc_fn(dest, v, str - v + 1);
646             break;
647         }
648         else {
649             while (*str != ';' && *str != ' ' && *str != '\t' && *str != '\0')
650                 ++str;
651         }
652         while (*str == ' ' || *str == '\t') ++str;
653     } while (*str++ == ';');
654 }
655 
656 static int
accesslog_append_time(buffer * const b,const request_st * const r,const format_field * const f,unix_timespec64_t * const ts,format_fields * const parsed_format)657 accesslog_append_time (buffer * const b, const request_st * const r,
658                        const format_field * const f,
659                        unix_timespec64_t * const ts,
660                        format_fields * const parsed_format)
661 {
662 			int flush = 0;
663 			if (f->field == FORMAT_TIMESTAMP) {
664 				if (f->opt & ~(FORMAT_FLAG_TIME_BEGIN|FORMAT_FLAG_TIME_END)) {
665 					if (f->opt & FORMAT_FLAG_TIME_SEC) {
666 						unix_time64_t t = (!(f->opt & FORMAT_FLAG_TIME_BEGIN)) ? log_epoch_secs : r->start_hp.tv_sec;
667 						buffer_append_int(b, (intmax_t)t);
668 					} else if (f->opt & (FORMAT_FLAG_TIME_MSEC|FORMAT_FLAG_TIME_USEC|FORMAT_FLAG_TIME_NSEC)) {
669 						unix_time64_t t;
670 						long ns;
671 						if (!(f->opt & FORMAT_FLAG_TIME_BEGIN)) {
672 							if (0 == ts->tv_sec) log_clock_gettime_realtime(ts);
673 							t = ts->tv_sec;
674 							ns = ts->tv_nsec;
675 						} else {
676 							t = r->start_hp.tv_sec;
677 							ns = r->start_hp.tv_nsec;
678 						}
679 						if (f->opt & FORMAT_FLAG_TIME_MSEC) {
680 							t *= 1000;
681 							t += (ns + 999999) / 1000000; /* ceil */
682 						} else if (f->opt & FORMAT_FLAG_TIME_USEC) {
683 							t *= 1000000;
684 							t += (ns + 999) / 1000; /* ceil */
685 						} else {/*(f->opt & FORMAT_FLAG_TIME_NSEC)*/
686 							t *= 1000000000;
687 							t += ns;
688 						}
689 						buffer_append_int(b, (intmax_t)t);
690 					} else { /*(FORMAT_FLAG_TIME_MSEC_FRAC|FORMAT_FLAG_TIME_USEC_FRAC|FORMAT_FLAG_TIME_NSEC_FRAC)*/
691 						long ns;
692 						char *ptr;
693 						if (!(f->opt & FORMAT_FLAG_TIME_BEGIN)) {
694 							if (0 == ts->tv_sec) log_clock_gettime_realtime(ts);
695 							ns = ts->tv_nsec;
696 						} else {
697 							ns = r->start_hp.tv_nsec;
698 						}
699 						/*assert(t < 1000000000);*/
700 						if (f->opt & FORMAT_FLAG_TIME_MSEC_FRAC) {
701 							ns +=  999999; /* ceil */
702 							ns /= 1000000;
703 							buffer_append_string_len(b, CONST_STR_LEN("000"));
704 						} else if (f->opt & FORMAT_FLAG_TIME_USEC_FRAC) {
705 							ns +=  999; /* ceil */
706 							ns /= 1000;
707 							buffer_append_string_len(b, CONST_STR_LEN("000000"));
708 						} else {/*(f->opt & FORMAT_FLAG_TIME_NSEC_FRAC)*/
709 							buffer_append_string_len(b, CONST_STR_LEN("000000000"));
710 						}
711 						ptr = b->ptr + buffer_clen(b);
712 						for (long x; ns > 0; ns = x)
713 							*--ptr += (ns - (x = ns/10) * 10); /* ns % 10 */
714 					}
715 				} else {
716 					buffer * const ts_accesslog_str = &parsed_format->ts_accesslog_str;
717 					/* cache the generated timestamp (only if ! FORMAT_FLAG_TIME_BEGIN) */
718 					unix_time64_t t;
719 					struct tm tm;
720 
721 					if (!(f->opt & FORMAT_FLAG_TIME_BEGIN)) {
722 						const unix_time64_t cur_ts = log_epoch_secs;
723 						if (parsed_format->last_generated_accesslog_ts == cur_ts) {
724 							buffer_append_string_buffer(b, ts_accesslog_str);
725 							return 0; /* flush == 0 */
726 						}
727 						t = parsed_format->last_generated_accesslog_ts = cur_ts;
728 						flush = 1;
729 					} else {
730 						t = r->start_hp.tv_sec;
731 					}
732 
733 					const char *fmt = buffer_is_blank(&f->string)
734 					  ? NULL
735 					  : f->string.ptr;
736 					buffer_clear(ts_accesslog_str);
737 				      #if defined(HAVE_STRUCT_TM_GMTOFF)
738 					buffer_append_strftime(ts_accesslog_str,
739 					                       fmt ? fmt : "[%d/%b/%Y:%T %z]",
740 					                       localtime64_r(&t, &tm));
741 				      #else /* HAVE_STRUCT_TM_GMTOFF */
742 					buffer_append_strftime(ts_accesslog_str,
743 					                       fmt ? fmt : "[%d/%b/%Y:%T +0000]",
744 					                       gmtime64_r(&t, &tm));
745 				      #endif /* HAVE_STRUCT_TM_GMTOFF */
746 					buffer_append_string_buffer(b, ts_accesslog_str);
747 				}
748 			}
749 			else { /* FORMAT_TIME_USED or FORMAT_TIME_USED_US */
750 				if (f->opt & FORMAT_FLAG_TIME_SEC) {
751 					buffer_append_int(b, log_epoch_secs - r->start_hp.tv_sec);
752 				} else {
753 					const unix_timespec64_t * const bs = &r->start_hp;
754 					off_t tdiff; /*(expected to be 64-bit since large file support enabled)*/
755 					if (0 == ts->tv_sec) log_clock_gettime_realtime(ts);
756 					tdiff = (off_t)(ts->tv_sec - bs->tv_sec)*1000000000 + (ts->tv_nsec - bs->tv_nsec);
757 					if (tdiff <= 0) {
758 						/* sanity check for time moving backwards
759 						 * (daylight savings adjustment or leap seconds or ?) */
760 						tdiff  = -1;
761 					} else if (f->opt & FORMAT_FLAG_TIME_MSEC) {
762 						tdiff +=  999999; /* ceil */
763 						tdiff /= 1000000;
764 					} else if (f->opt & FORMAT_FLAG_TIME_USEC) {
765 						tdiff +=  999; /* ceil */
766 						tdiff /= 1000;
767 					} /* else (f->opt & FORMAT_FLAG_TIME_NSEC) */
768 					buffer_append_int(b, (intmax_t)tdiff);
769 				}
770 			}
771 			return flush;
772 }
773 
774 __attribute_noinline__
775 static void
accesslog_append_remote_addr_masked(buffer * const b,const request_st * const r)776 accesslog_append_remote_addr_masked (buffer * const b, const request_st * const r)
777 {
778     /* mask lower bits of IP address string for logging
779      * (similar to masking policy applied by Google Analytics
780      *  https://support.google.com/analytics/answer/2763052) */
781     /* r->dst_addr_buf is normalized and valid string; operate on known valid */
782     const char * const s = r->dst_addr_buf->ptr;
783     uint32_t i = 0;
784     switch (sock_addr_get_family(r->dst_addr)) {
785      #ifdef HAVE_IPV6
786       case AF_INET6:
787         if (__builtin_expect( (s[0] != ':'), 1)
788             || !IN6_IS_ADDR_V4MAPPED(&((sock_addr*)r->dst_addr)->ipv6.sin6_addr)
789             || NULL == strchr(s, '.')) {
790             /* IPv6: mask final 10 octets (80 bits) of address; keep 6 octets */
791             /* Note: treat string starting w/ "::..." as "::" even if "::x:..."
792              * (rather than special-casing; does not detect "::x:..." besides
793              *  v4mapped "::ffff:1.2.3.4" condition check above) */
794             for (int j=0; s[i] != ':' || ((j+=2) != 6 && s[i+1] != ':'); ++i) ;
795             buffer_append_str2(b, s, i+1, CONST_STR_LEN(":"));
796             break;
797         }
798         /* IN6_IS_ADDR_V4MAPPED() */
799         __attribute_fallthrough__
800      #endif
801       case AF_INET:
802         /* IPv4: mask final octet (8 bits) of address */
803        #ifdef __COVERITY__
804         force_assert(buffer_clen(r->dst_addr_buf) > 2);
805         force_assert(strchr(s, '.') != NULL);
806        #endif
807         for (i = buffer_clen(r->dst_addr_buf)-1; s[--i] != '.'; ) ;
808         buffer_append_str2(b, s, i+1, CONST_STR_LEN("0"));
809         break;
810       default:
811         buffer_append_buffer(b, r->dst_addr_buf);
812         break;
813     }
814 }
815 
816 __attribute_cold__
817 __attribute_noinline__
818 static void
log_access_record_cold(buffer * const b,const request_st * const r,const format_field * const f,esc_fn_t esc_fn)819 log_access_record_cold (buffer * const b, const request_st * const r,
820                         const format_field * const f, esc_fn_t esc_fn)
821 {
822     connection * const con = r->con;
823     switch (f->field) {
824       case FORMAT_SERVER_PORT:
825         if (f->opt & FORMAT_FLAG_PORT_REMOTE) {
826             buffer_append_int(b, sock_addr_get_port(r->dst_addr));
827             break;
828         }
829         /* else if (f->opt & FORMAT_FLAG_PORT_LOCAL) *//*(default)*/
830         __attribute_fallthrough__
831       case FORMAT_LOCAL_ADDR:
832         {
833             const server_socket * const srv_sock = con->srv_socket;
834             const buffer * const srv_token = srv_sock->srv_token;
835             const uint32_t colon = srv_sock->srv_token_colon;
836             if (f->field == FORMAT_LOCAL_ADDR)
837                 /* (perf: not using getsockname() and
838                  *  sock_addr_cache_inet_ntop_copy_buffer())
839                  * (still useful if admin has configured explicit listen IPs) */
840                 buffer_append_string_len(b, srv_token->ptr, colon);
841             else { /* FORMAT_SERVER_PORT */
842                 const uint32_t tlen = buffer_clen(srv_token);
843                 if (colon < tlen) /*(colon != tlen)*/
844                     buffer_append_string_len(b, srv_token->ptr+colon+1,
845                                              tlen - (colon+1));
846             }
847         }
848         break;
849       case FORMAT_KEEPALIVE_COUNT:
850         if (con->request_count > 1)
851             buffer_append_int(b, (intmax_t)(con->request_count-1));
852         else
853             buffer_append_char(b, '0');
854         break;
855       case FORMAT_URL:
856         {
857             const uint32_t len = buffer_clen(&r->target);
858             const char * const qmark = memchr(r->target.ptr, '?', len);
859             esc_fn(b, r->target.ptr,
860                    qmark ? (uint32_t)(qmark - r->target.ptr) : len);
861         }
862         break;
863       case FORMAT_QUERY_STRING:
864         esc_fn(b, BUF_PTR_LEN(&r->uri.query));
865         break;
866       case FORMAT_FILENAME:
867         accesslog_append_buffer(b, &r->physical.path, esc_fn);
868         break;
869       case FORMAT_CONNECTION_STATUS:
870         buffer_append_char(b, (r->state == CON_STATE_RESPONSE_END)
871                               ? r->keep_alive <= 0 ? '-' : '+'
872                               : 'X'); /* CON_STATE_ERROR */
873         break;
874      #if 0 /*(parsed and replaced at startup)*/
875       case FORMAT_REMOTE_IDENT:
876         /* ident */
877         buffer_append_char(b, '-');
878         break;
879      #endif
880      #if 0 /*(parsed and replaced at startup)*/
881       case FORMAT_PERCENT:
882         buffer_append_char(b, '%');
883         break;
884      #endif
885       default:
886         break;
887     }
888 }
889 
log_access_record(const request_st * const r,buffer * const b,format_fields * const parsed_format,esc_fn_t esc)890 static int log_access_record (const request_st * const r, buffer * const b, format_fields * const parsed_format, esc_fn_t esc) {
891 	const buffer *vb;
892 	unix_timespec64_t ts = { 0, 0 };
893 	int flush = 0;
894 
895 	for (const format_field *f = parsed_format->ptr; f->field != FORMAT_UNSET; ++f) {
896 			switch(f->field) {
897 			case FORMAT_LITERAL:
898 				buffer_append_string_buffer(b, &f->string);
899 				break;
900 			case FORMAT_HEADER:
901 				vb = http_header_request_get(r, f->opt, BUF_PTR_LEN(&f->string));
902 				accesslog_append_buffer(b, vb, esc);
903 				break;
904 			case FORMAT_RESPONSE_HEADER:
905 				vb = http_header_response_get(r, f->opt, BUF_PTR_LEN(&f->string));
906 				accesslog_append_buffer(b, vb, esc);
907 				break;
908 		  #if 0 /*(parsed and redirected at startup to FORMAT_ENV "REMOTE_USER")*/
909 			case FORMAT_REMOTE_USER:
910 				vb = http_header_env_get(r, CONST_STR_LEN("REMOTE_USER"));
911 				accesslog_append_buffer(b, vb, esc);
912 				break;
913 		  #endif
914 		  #if 0 /*(parsed and redirected at startup to FORMAT_ENV)*/
915 			/*case FORMAT_NOTE:*/
916 		  #endif
917 			case FORMAT_ENV:
918 				vb = http_header_env_get(r, BUF_PTR_LEN(&f->string));
919 				accesslog_append_buffer(b, vb, esc);
920 				break;
921 			case FORMAT_TIMESTAMP:
922 		  #if 0 /*(parsed and redirected at startup to FORMAT_TIME_USED)*/
923 			case FORMAT_TIME_USED_US:
924 		  #endif
925 			case FORMAT_TIME_USED:
926 				flush |= accesslog_append_time(b, r, f, &ts, parsed_format);
927 				break;
928 		  #if 0 /*(parsed and redirected at startup to FORMAT_REMOTE_ADDR)*/
929 			/*case FORMAT_REMOTE_HOST:*/
930 		  #endif
931 			case FORMAT_REMOTE_ADDR:
932 				if (!f->opt)
933 					buffer_append_string_buffer(b, r->dst_addr_buf);
934 				else
935 					accesslog_append_remote_addr_masked(b, r);
936 				break;
937 			case FORMAT_HTTP_HOST:
938 				accesslog_append_buffer(b, &r->uri.authority, esc);
939 				break;
940 			case FORMAT_REQUEST_LINE:
941 				/*(attempt to reconstruct request line)*/
942 				http_method_append(b, r->http_method);
943 				buffer_append_char(b, ' ');
944 				esc(b, BUF_PTR_LEN(&r->target_orig));
945 				buffer_append_char(b, ' ');
946 				http_version_append(b, r->http_version);
947 				break;
948 			case FORMAT_STATUS:
949 				buffer_append_int(b, r->http_status);
950 				break;
951 			case FORMAT_BYTES_OUT_NO_HEADER:
952 				accesslog_append_bytes(b, http_request_stats_bytes_out(r),
953 				                       r->resp_header_len);
954 				break;
955 			case FORMAT_BYTES_OUT:
956 				accesslog_append_bytes(b, http_request_stats_bytes_out(r), 0);
957 				break;
958 			case FORMAT_BYTES_IN:
959 				accesslog_append_bytes(b, http_request_stats_bytes_in(r), 0);
960 				break;
961 			case FORMAT_SERVER_NAME:
962 				accesslog_append_buffer(b, r->server_name, esc);
963 				break;
964 			case FORMAT_REQUEST_PROTOCOL:
965 				http_version_append(b, r->http_version);
966 				break;
967 			case FORMAT_REQUEST_METHOD:
968 				http_method_append(b, r->http_method);
969 				break;
970 			case FORMAT_COOKIE:
971 				accesslog_append_cookie(b, r, &f->string, esc);
972 				break;
973 			default:
974 				log_access_record_cold(b, r, f, esc);
975 				break;
976 			}
977 	}
978 
979 	return flush;
980 }
981 
REQUESTDONE_FUNC(log_access_write)982 REQUESTDONE_FUNC(log_access_write) {
983     plugin_data * const p = p_d;
984     mod_accesslog_patch_config(r, p);
985     fdlog_st * const fdlog = p->conf.fdlog;
986 
987     /* No output device, nothing to do */
988     if (!p->conf.use_syslog && !fdlog) return HANDLER_GO_ON;
989 
990     buffer * const b = (p->conf.use_syslog || fdlog->mode == FDLOG_PIPE)
991       ? (buffer_clear(r->tmp_buf), r->tmp_buf)
992       : &fdlog->b;
993 
994     esc_fn_t * const esc_fn = !p->conf.escaping
995       ? buffer_append_bs_escaped
996       : buffer_append_bs_escaped_json;
997     const int flush =
998       log_access_record(r, b, p->conf.parsed_format, esc_fn);
999 
1000   #ifdef HAVE_SYSLOG_H
1001     if (p->conf.use_syslog) {
1002         if (!buffer_is_blank(b))
1003             syslog(p->conf.syslog_level, "%s", b->ptr);
1004         return HANDLER_GO_ON;
1005     }
1006   #endif
1007 
1008     buffer_append_char(b, '\n');
1009 
1010     if (flush || fdlog->mode == FDLOG_PIPE || buffer_clen(b) >= 8192) {
1011         const ssize_t wr = write_all(fdlog->fd, BUF_PTR_LEN(b));
1012         buffer_clear(b); /*(clear buffer, even on error)*/
1013         if (-1 == wr)
1014             log_perror(r->conf.errh, __FILE__, __LINE__,
1015               "error flushing log %s", fdlog->fn);
1016     }
1017 
1018     return HANDLER_GO_ON;
1019 }
1020 
1021 
1022 __attribute_cold__
1023 int mod_accesslog_plugin_init(plugin *p);
mod_accesslog_plugin_init(plugin * p)1024 int mod_accesslog_plugin_init(plugin *p) {
1025 	p->version     = LIGHTTPD_VERSION_ID;
1026 	p->name        = "accesslog";
1027 
1028 	p->init        = mod_accesslog_init;
1029 	p->set_defaults= mod_accesslog_set_defaults;
1030 	p->cleanup     = mod_accesslog_free;
1031 
1032 	p->handle_request_done  = log_access_write;
1033 	p->handle_trigger       = log_access_periodic_flush;
1034 
1035 	return 0;
1036 }
1037