xref: /lighttpd1.4/src/connections.c (revision 3a8fc4bc)
1 #include "first.h"
2 
3 #include "base.h"
4 #include "buffer.h"
5 #include "burl.h"       /* HTTP_PARSEOPT_HEADER_STRICT */
6 #include "chunk.h"
7 #include "log.h"
8 #include "connections.h"
9 #include "fdevent.h"
10 #include "h2.h"
11 #include "http_header.h"
12 
13 #include "reqpool.h"
14 #include "request.h"
15 #include "response.h"
16 #include "network.h"
17 #include "stat_cache.h"
18 
19 #include "plugin.h"
20 #include "plugins.h"
21 
22 #include "sock_addr_cache.h"
23 
24 #include <sys/stat.h>
25 
26 #include <stdlib.h>
27 #include <unistd.h>
28 #include <errno.h>
29 #include <string.h>
30 
31 #include "sys-socket.h"
32 
33 #define HTTP_LINGER_TIMEOUT 5
34 
35 #define connection_set_state(r, n) ((r)->state = (n))
36 
37 __attribute_cold__
connection_set_state_error(request_st * const r,const request_state_t state)38 static void connection_set_state_error(request_st * const r, const request_state_t state) {
39     connection_set_state(r, state);
40 }
41 
42 __attribute_cold__
43 static connection *connection_init(server *srv);
44 
45 static void connection_reset(connection *con);
46 
connections_get_new_connection(server * srv)47 static connection *connections_get_new_connection(server *srv) {
48     connection *con;
49     --srv->lim_conns;
50     if (srv->conns_pool) {
51         con = srv->conns_pool;
52         srv->conns_pool = con->next;
53     }
54     else {
55         con = connection_init(srv);
56         connection_reset(con);
57     }
58     /*con->prev = NULL;*//*(already set)*/
59     if ((con->next = srv->conns))
60         con->next->prev = con;
61     return (srv->conns = con);
62 }
63 
connection_del(server * srv,connection * con)64 static void connection_del(server *srv, connection *con) {
65     if (con->next)
66         con->next->prev = con->prev;
67     if (con->prev)
68         con->prev->next = con->next;
69     else
70         srv->conns = con->next;
71     con->prev = NULL;
72     con->next = srv->conns_pool;
73     srv->conns_pool = con;
74     ++srv->lim_conns;
75 }
76 
connection_close(connection * con)77 static void connection_close(connection *con) {
78 	if (con->fd < 0) con->fd = -con->fd;
79 
80 	plugins_call_handle_connection_close(con);
81 
82 	server * const srv = con->srv;
83 	request_st * const r = &con->request;
84 	request_reset_ex(r); /*(r->conf.* is still valid below)*/
85 	connection_set_state(r, CON_STATE_CONNECT);
86 
87 	chunkqueue_reset(con->read_queue);
88 	con->request_count = 0;
89 	con->is_ssl_sock = 0;
90 	con->revents_err = 0;
91 
92 	fdevent_fdnode_event_del(srv->ev, con->fdn);
93 	fdevent_unregister(srv->ev, con->fdn);
94 	con->fdn = NULL;
95 #ifdef __WIN32
96 	if (0 == closesocket(con->fd))
97 #else
98 	if (0 == close(con->fd))
99 #endif
100 		--srv->cur_fds;
101 	else
102 		log_perror(r->conf.errh, __FILE__, __LINE__,
103 		  "(warning) close: %d", con->fd);
104 
105 	if (r->conf.log_state_handling) {
106 		log_error(r->conf.errh, __FILE__, __LINE__,
107 		  "connection closed for fd %d", con->fd);
108 	}
109 	con->fd = -1;
110 
111 	connection_del(srv, con);
112 }
113 
connection_read_for_eos_plain(connection * const con)114 static void connection_read_for_eos_plain(connection * const con) {
115 	/* we have to do the linger_on_close stuff regardless
116 	 * of r->keep_alive; even non-keepalive sockets
117 	 * may still have unread data, and closing before reading
118 	 * it will make the client not see all our output.
119 	 */
120 	ssize_t len;
121 	const int type = sock_addr_get_family(&con->dst_addr);
122 	char buf[16384];
123 	do {
124 		len = fdevent_socket_read_discard(con->fd, buf, sizeof(buf),
125 						  type, SOCK_STREAM);
126 	} while (len > 0 || (len < 0 && errno == EINTR));
127 
128 	if (len < 0 && errno == EAGAIN) return;
129       #if defined(EWOULDBLOCK) && EWOULDBLOCK != EAGAIN
130 	if (len < 0 && errno == EWOULDBLOCK) return;
131       #endif
132 
133 	/* 0 == len || (len < 0 && (errno is a non-recoverable error)) */
134 		con->close_timeout_ts = log_monotonic_secs - (HTTP_LINGER_TIMEOUT+1);
135 }
136 
connection_read_for_eos_ssl(connection * const con)137 static void connection_read_for_eos_ssl(connection * const con) {
138 	if (con->network_read(con, con->read_queue, MAX_READ_LIMIT) < 0)
139 		con->close_timeout_ts = log_monotonic_secs - (HTTP_LINGER_TIMEOUT+1);
140 	chunkqueue_reset(con->read_queue);
141 }
142 
connection_read_for_eos(connection * const con)143 static void connection_read_for_eos(connection * const con) {
144 	!con->is_ssl_sock
145 	  ? connection_read_for_eos_plain(con)
146 	  : connection_read_for_eos_ssl(con);
147 }
148 
connection_handle_close_state(connection * con)149 static void connection_handle_close_state(connection *con) {
150 	connection_read_for_eos(con);
151 
152 	if (log_monotonic_secs - con->close_timeout_ts > HTTP_LINGER_TIMEOUT) {
153 		connection_close(con);
154 	}
155 }
156 
connection_handle_shutdown(connection * con)157 static void connection_handle_shutdown(connection *con) {
158 	plugins_call_handle_connection_shut_wr(con);
159 
160 	connection_reset(con);
161 
162 	/* close the connection */
163 	if (con->fd >= 0
164 	    && (con->is_ssl_sock || 0 == shutdown(con->fd, SHUT_WR))) {
165 		con->close_timeout_ts = log_monotonic_secs;
166 
167 		request_st * const r = &con->request;
168 		connection_set_state(r, CON_STATE_CLOSE);
169 		if (r->conf.log_state_handling) {
170 			log_error(r->conf.errh, __FILE__, __LINE__,
171 			  "shutdown for fd %d", con->fd);
172 		}
173 	} else {
174 		connection_close(con);
175 	}
176 }
177 
178 
connection_handle_response_end_state(request_st * const r,connection * const con)179 static void connection_handle_response_end_state(request_st * const r, connection * const con) {
180 	if (r->http_version > HTTP_VERSION_1_1) {
181 		h2_retire_con(r, con);
182 		r->keep_alive = 0;
183 		/* set a status so that mod_accesslog, mod_rrdtool hooks are called
184 		 * in plugins_call_handle_request_done() (XXX: or set to 0 to omit) */
185 		r->http_status = 100; /* XXX: what if con->state == CON_STATE_ERROR? */
186 	}
187 
188 	/* call request_done hook if http_status set (e.g. to log request) */
189 	/* (even if error, connection dropped, as long as http_status is set) */
190 	if (r->http_status) plugins_call_handle_request_done(r);
191 
192 	if (r->reqbody_length != r->reqbody_queue.bytes_in
193 	    || r->state == CON_STATE_ERROR) {
194 		/* request body may not have been read completely */
195 		r->keep_alive = 0;
196 		/* clean up failed partial write of 1xx intermediate responses*/
197 		if (&r->write_queue != con->write_queue) { /*(for HTTP/1.1)*/
198 			chunkqueue_free(con->write_queue);
199 			con->write_queue = &r->write_queue;
200 		}
201 	}
202 
203         if (r->keep_alive > 0) {
204 		request_reset(r);
205 		con->is_readable = 1; /* potentially trigger optimistic read */
206 		/*(accounting used by mod_accesslog for HTTP/1.0 and HTTP/1.1)*/
207 		/*(overloaded to detect next bytes recv'd on keep-alive con)*/
208 		r->bytes_read_ckpt = r->read_queue.bytes_in;
209 		r->bytes_written_ckpt = r->write_queue.bytes_out;
210 #if 0
211 		r->start_hp.tv_sec = log_epoch_secs;
212 		con->read_idle_ts = log_monotonic_secs;
213 #endif
214 		connection_set_state(r, CON_STATE_REQUEST_START);
215 	} else {
216 		connection_handle_shutdown(con);
217 	}
218 }
219 
220 
221 __attribute_pure__
222 static off_t
connection_write_throttled(const connection * const con,off_t max_bytes)223 connection_write_throttled (const connection * const con, off_t max_bytes)
224 {
225     const request_config * const restrict rconf = &con->request.conf;
226     if (0 == rconf->global_bytes_per_second && 0 == rconf->bytes_per_second)
227         return max_bytes;
228 
229     if (rconf->global_bytes_per_second) {
230         off_t limit = (off_t)rconf->global_bytes_per_second
231                     - *(rconf->global_bytes_per_second_cnt_ptr);
232         if (max_bytes > limit)
233             max_bytes = limit;
234     }
235 
236     if (rconf->bytes_per_second) {
237         off_t limit = (off_t)rconf->bytes_per_second
238                     - con->bytes_written_cur_second;
239         if (max_bytes > limit)
240             max_bytes = limit;
241     }
242 
243     return max_bytes > 0 ? max_bytes : 0; /*(0 == reached traffic limit)*/
244 }
245 
246 
247 static off_t
connection_write_throttle(connection * const con,off_t max_bytes)248 connection_write_throttle (connection * const con, off_t max_bytes)
249 {
250     /*assert(max_bytes > 0);*/
251     max_bytes = connection_write_throttled(con, max_bytes);
252     if (0 == max_bytes) con->traffic_limit_reached = 1;
253     return max_bytes;
254 }
255 
256 
257 static int
connection_write_chunkqueue(connection * const con,chunkqueue * const restrict cq,off_t max_bytes)258 connection_write_chunkqueue (connection * const con, chunkqueue * const restrict cq, off_t max_bytes)
259 {
260     /*assert(!chunkqueue_is_empty(cq));*//* checked by callers */
261 
262     con->write_request_ts = log_monotonic_secs;
263 
264     max_bytes = connection_write_throttle(con, max_bytes);
265     if (0 == max_bytes) return 1;
266 
267     off_t written = cq->bytes_out;
268     int ret;
269 
270   #ifdef TCP_CORK
271     int corked = 0;
272   #endif
273 
274     /* walk chunkqueue up to first FILE_CHUNK (if present)
275      * This may incur memory load misses for pointer chasing, but effectively
276      * preloads part of the chunkqueue, something which used to be a side effect
277      * of a previous (less efficient) version of chunkqueue_length() which
278      * walked the entire chunkqueue (on each and every call).  The loads here
279      * make a measurable difference in performance in underlying call to
280      * con->network_write() */
281     if (cq->first->next && cq->first->type == MEM_CHUNK) {
282         const chunk *c = cq->first;
283         do { c = c->next; } while (c && c->type == MEM_CHUNK);
284       #ifdef TCP_CORK
285         /* Linux: put a cork into socket as we want to combine write() calls
286          * but only if we really have multiple chunks including non-MEM_CHUNK
287          * (or if multiple chunks and TLS), and only if TCP socket */
288         /* (max_bytes may have been reduced by connection_write_throttle(),
289          *  but not bothering to check; might result in some extra corking) */
290         if (NULL != c || (con->is_ssl_sock && chunkqueue_length(cq) > 16384)) {
291             const int sa_family = sock_addr_get_family(&con->srv_socket->addr);
292             if (sa_family == AF_INET || sa_family == AF_INET6) {
293                 corked = 1;
294                 (void)setsockopt(con->fd, IPPROTO_TCP, TCP_CORK,
295                                  &corked, sizeof(corked));
296             }
297         }
298       #endif
299     }
300 
301     ret = con->network_write(con, cq, max_bytes);
302     if (ret >= 0) {
303         ret = chunkqueue_is_empty(cq) ? 0 : 1;
304     }
305 
306   #ifdef TCP_CORK
307     if (corked) {
308         corked = 0;
309         (void)setsockopt(con->fd, IPPROTO_TCP, TCP_CORK,
310                          &corked, sizeof(corked));
311     }
312   #endif
313 
314     written = cq->bytes_out - written;
315     con->bytes_written_cur_second += written;
316     request_st * const r = &con->request;
317     if (r->conf.global_bytes_per_second_cnt_ptr)
318         *(r->conf.global_bytes_per_second_cnt_ptr) += written;
319 
320     return ret;
321 }
322 
323 
324 static int
connection_write_1xx_info(request_st * const r,connection * const con)325 connection_write_1xx_info (request_st * const r, connection * const con)
326 {
327     /* (Note: prior 1xx intermediate responses may be present in cq) */
328     /* (Note: also choosing not to update con->write_request_ts
329      *  which differs from connection_write_chunkqueue()) */
330     chunkqueue * const cq = con->write_queue;
331     off_t written = cq->bytes_out;
332 
333     int rc = con->network_write(con, cq, MAX_WRITE_LIMIT);
334 
335     written = cq->bytes_out - written;
336     con->bytes_written_cur_second += written;
337     if (r->conf.global_bytes_per_second_cnt_ptr)
338         *(r->conf.global_bytes_per_second_cnt_ptr) += written;
339 
340     if (rc < 0) {
341         connection_set_state_error(r, CON_STATE_ERROR);
342         return 0; /* error */
343     }
344 
345     if (!chunkqueue_is_empty(cq)) { /* partial write (unlikely) */
346         con->is_writable = 0;
347         if (cq == &r->write_queue) {
348             /* save partial write of 1xx in separate chunkqueue
349              * Note: sending of remainder of 1xx might be delayed
350              * until next set of response headers are sent */
351             con->write_queue = chunkqueue_init(NULL);
352             /* (copy bytes for accounting purposes in event of failure) */
353             con->write_queue->bytes_in = cq->bytes_out; /*(yes, bytes_out)*/
354             con->write_queue->bytes_out = cq->bytes_out;
355             chunkqueue_append_chunkqueue(con->write_queue, cq);
356         }
357     }
358 
359   #if 0
360     /* XXX: accounting inconsistency
361      * 1xx is not currently included in r->resp_header_len,
362      * so mod_accesslog reporting of %b or %B (FORMAT_BYTES_OUT_NO_HEADER)
363      * reports all bytes out minus len of final response headers,
364      * but including 1xx intermediate responses.  If 1xx intermediate
365      * responses were included in r->resp_header_len, then there are a
366      * few places in the code which must be adjusted to use r->resp_header_done
367      * instead of (0 == r->resp_header_len) as flag that final response was set
368      * (Doing the following would "discard" the 1xx len from bytes_out)
369      */
370     r->write_queue.bytes_in = r->write_queue.bytes_out = 0;
371   #endif
372 
373     return 1; /* success */
374 }
375 
376 
377 int
connection_send_1xx(request_st * const r,connection * const con)378 connection_send_1xx (request_st * const r, connection * const con)
379 {
380     /* Make best effort to send HTTP/1.1 1xx intermediate */
381     /* (Note: if other modules set response headers *before* the
382      *  handle_response_start hook, and the backends subsequently sends 1xx,
383      *  then the response headers are sent here with 1xx and might be cleared
384      *  by caller (http_response_parse_headers() and http_response_check_1xx()),
385      *  instead of being sent with the final response.
386      *  (e.g. mod_magnet setting response headers, then backend sending 103)) */
387 
388     chunkqueue * const cq = con->write_queue; /*(bypass r->write_queue)*/
389 
390     buffer * const b = chunkqueue_append_buffer_open(cq);
391     buffer_copy_string_len(b, CONST_STR_LEN("HTTP/1.1 "));
392     http_status_append(b, r->http_status);
393     for (uint32_t i = 0; i < r->resp_headers.used; ++i) {
394         const data_string * const ds = (data_string *)r->resp_headers.data[i];
395         const uint32_t klen = buffer_clen(&ds->key);
396         const uint32_t vlen = buffer_clen(&ds->value);
397         if (0 == klen || 0 == vlen) continue;
398         buffer_append_str2(b, CONST_STR_LEN("\r\n"), ds->key.ptr, klen);
399         buffer_append_str2(b, CONST_STR_LEN(": "), ds->value.ptr, vlen);
400     }
401     buffer_append_string_len(b, CONST_STR_LEN("\r\n\r\n"));
402     chunkqueue_append_buffer_commit(cq);
403 
404     if (con->traffic_limit_reached)
405         return 1; /* success; send later if throttled */
406 
407     return connection_write_1xx_info(r, con);
408 }
409 
410 
411 static int
connection_write_100_continue(request_st * const r,connection * const con)412 connection_write_100_continue (request_st * const r, connection * const con)
413 {
414     /* Make best effort to send "HTTP/1.1 100 Continue" */
415     static const char http_100_continue[] = "HTTP/1.1 100 Continue\r\n\r\n";
416 
417     if (con->traffic_limit_reached)
418         return 1; /* success; skip sending if throttled */
419 
420     chunkqueue * const cq = con->write_queue; /*(bypass r->write_queue)*/
421     chunkqueue_append_mem(cq, http_100_continue, sizeof(http_100_continue)-1);
422     return connection_write_1xx_info(r, con);
423 }
424 
425 
connection_handle_write(request_st * const r,connection * const con)426 static int connection_handle_write(request_st * const r, connection * const con) {
427 	/*assert(!chunkqueue_is_empty(cq));*//* checked by callers */
428 
429 	if (con->is_writable <= 0) return CON_STATE_WRITE;
430 	int rc = connection_write_chunkqueue(con, con->write_queue, MAX_WRITE_LIMIT);
431 	switch (rc) {
432 	case 0:
433 		if (r->resp_body_finished) {
434 			connection_set_state(r, CON_STATE_RESPONSE_END);
435 			return CON_STATE_RESPONSE_END;
436 		}
437 		break;
438 	case -1: /* error on our side */
439 		log_error(r->conf.errh, __FILE__, __LINE__,
440 		  "connection closed: write failed on fd %d", con->fd);
441 		__attribute_fallthrough__
442 	case -2: /* remote close */
443 		connection_set_state_error(r, CON_STATE_ERROR);
444 		return CON_STATE_ERROR;
445 	case 1:
446 		/* do not spin trying to send HTTP/2 server Connection Preface
447 		 * while waiting for TLS negotiation to complete */
448 		if (con->write_queue->bytes_out)
449 			con->is_writable = 0;
450 
451 		/* not finished yet -> WRITE */
452 		break;
453 	}
454 
455 	return CON_STATE_WRITE; /*(state did not change)*/
456 }
457 
connection_handle_write_state(request_st * const r,connection * const con)458 static int connection_handle_write_state(request_st * const r, connection * const con) {
459     do {
460         /* only try to write if we have something in the queue */
461         if (!chunkqueue_is_empty(&r->write_queue)) {
462             if (r->http_version <= HTTP_VERSION_1_1) {
463                 int rc = connection_handle_write(r, con);
464                 if (rc != CON_STATE_WRITE) return rc;
465             }
466         } else if (r->resp_body_finished) {
467             connection_set_state(r, CON_STATE_RESPONSE_END);
468             return CON_STATE_RESPONSE_END;
469         }
470 
471         if (r->handler_module && !r->resp_body_finished) {
472             const plugin * const p = r->handler_module;
473             int rc = p->handle_subrequest(r, p->data);
474             switch(rc) {
475             case HANDLER_WAIT_FOR_EVENT:
476             case HANDLER_FINISHED:
477             case HANDLER_GO_ON:
478                 break;
479             case HANDLER_COMEBACK:
480             default:
481                 log_error(r->conf.errh, __FILE__, __LINE__,
482                   "unexpected subrequest handler ret-value: %d %d",
483                   con->fd, rc);
484                 __attribute_fallthrough__
485             case HANDLER_ERROR:
486                 connection_set_state_error(r, CON_STATE_ERROR);
487                 return CON_STATE_ERROR;
488             }
489         }
490     } while (r->http_version <= HTTP_VERSION_1_1
491              && (!chunkqueue_is_empty(&r->write_queue)
492                  ? con->is_writable > 0 && 0 == con->traffic_limit_reached
493                  : r->resp_body_finished));
494 
495     return CON_STATE_WRITE;
496 }
497 
498 
499 __attribute_cold__
connection_init(server * srv)500 static connection *connection_init(server *srv) {
501 	connection * const con = ck_calloc(1, sizeof(*con));
502 
503 	con->srv = srv;
504 	con->plugin_slots = srv->plugin_slots;
505 	con->config_data_base = srv->config_data_base;
506 
507 	request_st * const r = &con->request;
508 	request_init_data(r, con, srv);
509 	con->write_queue = &r->write_queue;
510 	con->read_queue = &r->read_queue;
511 
512 	/* init plugin-specific per-connection structures */
513 	con->plugin_ctx = ck_calloc(srv->plugins.used + 1, sizeof(void *));
514 
515 	return con;
516 }
517 
518 
connection_free(connection * const con)519 static void connection_free(connection * const con) {
520     request_st * const r = &con->request;
521 
522     connection_reset(con);
523     if (con->write_queue != &r->write_queue)
524         chunkqueue_free(con->write_queue);
525     if (con->read_queue != &r->read_queue)
526         chunkqueue_free(con->read_queue);
527     request_free_data(r);
528 
529     free(con->plugin_ctx);
530     free(con->dst_addr_buf.ptr);
531     free(con);
532 }
533 
connections_pool_clear(server * const srv)534 void connections_pool_clear(server * const srv) {
535     connection *con;
536     while ((con = srv->conns_pool)) {
537         srv->conns_pool = con->next;
538         connection_free(con);
539     }
540 }
541 
connections_free(server * srv)542 void connections_free(server *srv) {
543     connections_pool_clear(srv);
544 
545     connection *con;
546     while ((con = srv->conns)) {
547         srv->conns = con->next;
548         connection_free(con);
549     }
550 }
551 
552 
connection_reset(connection * con)553 static void connection_reset(connection *con) {
554 	request_st * const r = &con->request;
555 	request_reset(r);
556 	r->bytes_read_ckpt = 0;
557 	r->bytes_written_ckpt = 0;
558 	con->is_readable = 1;
559 	con->bytes_written_cur_second = 0;
560 }
561 
562 
563 __attribute_cold__
564 static chunk *
connection_discard_blank_line(chunkqueue * const cq,uint32_t header_len)565 connection_discard_blank_line (chunkqueue * const cq, uint32_t header_len)
566 {
567     /*(separate func only to be able to mark with compiler hint as cold)*/
568     chunkqueue_mark_written(cq, header_len);
569     return cq->first; /* refresh c after chunkqueue_mark_written() */
570 }
571 
572 
connection_read_header_more(connection * con,chunkqueue * cq,chunk * c,const size_t olen)573 static chunk * connection_read_header_more(connection *con, chunkqueue *cq, chunk *c, const size_t olen) {
574     /*(should not be reached by HTTP/2 streams)*/
575     /*if (r->http_version == HTTP_VERSION_2) return NULL;*/
576     /*(However, new connections over TLS may become HTTP/2 connections via ALPN
577      * and return from this routine with r->http_version == HTTP_VERSION_2) */
578 
579     if ((NULL == c || NULL == c->next) && con->is_readable > 0) {
580         con->read_idle_ts = log_monotonic_secs;
581         if (0 != con->network_read(con, cq, MAX_READ_LIMIT)) {
582             request_st * const r = &con->request;
583             connection_set_state_error(r, CON_STATE_ERROR);
584         }
585         /* check if switched to HTTP/2 (ALPN "h2" during TLS negotiation) */
586         request_st * const r = &con->request;
587         if (r->http_version == HTTP_VERSION_2) return NULL;
588     }
589 
590     if (cq->first != cq->last && 0 != olen) {
591         const size_t clen = chunkqueue_length(cq);
592         size_t block = (olen + (16384-1)) & ~(16384-1);
593         block += (block - olen > 1024 ? 0 : 16384);
594         chunkqueue_compact_mem(cq, block > clen ? clen : block);
595     }
596 
597     /* detect if data is added to chunk */
598     c = cq->first;
599     return (c && (size_t)c->offset + olen < buffer_clen(c->mem))
600       ? c
601       : NULL;
602 }
603 
604 
605 __attribute_cold__
606 static void
connection_transition_h2(request_st * const h2r,connection * const con)607 connection_transition_h2 (request_st * const h2r, connection * const con)
608 {
609     buffer_copy_string_len(&h2r->target,      CONST_STR_LEN("*"));
610     buffer_copy_string_len(&h2r->target_orig, CONST_STR_LEN("*"));
611     buffer_copy_string_len(&h2r->uri.path,    CONST_STR_LEN("*"));
612     h2r->http_method = HTTP_METHOD_PRI;
613     h2r->reqbody_length = -1; /*(unnecessary for h2r?)*/
614     h2r->conf.stream_request_body |= FDEVENT_STREAM_REQUEST_POLLIN;
615 
616     /* (h2r->state == CON_STATE_READ) for transition by ALPN
617      *   or starting cleartext HTTP/2 with Prior Knowledge
618      *   (e.g. via HTTP Alternative Services)
619      * (h2r->state == CON_STATE_REQUEST_END) for Upgrade: h2c */
620 
621     if (h2r->state != CON_STATE_ERROR)
622         connection_set_state(h2r, CON_STATE_WRITE);
623 
624   #if 0 /* ... if it turns out we need a separate fdevent handler for HTTP/2 */
625     con->fdn->handler = connection_handle_fdevent_h2;
626   #endif
627 
628     if (NULL == con->h2) /*(not yet transitioned to HTTP/2; not Upgrade: h2c)*/
629         h2_init_con(h2r, con, NULL);
630 }
631 
632 
633 /**
634  * handle request header read
635  *
636  * we get called by the state-engine and by the fdevent-handler
637  */
638 __attribute_noinline__
connection_handle_read_state(connection * const con)639 static int connection_handle_read_state(connection * const con)  {
640     /*(should not be reached by HTTP/2 streams)*/
641     chunkqueue * const cq = con->read_queue;
642     chunk *c = cq->first;
643     uint32_t clen = 0;
644     uint32_t header_len = 0;
645     request_st * const r = &con->request;
646     uint8_t keepalive_request_start = 0;
647     uint8_t pipelined_request_start = 0;
648     uint8_t discard_blank = 0;
649     unsigned short hoff[8192]; /* max num header lines + 3; 16k on stack */
650 
651     if (con->request_count > 1) {
652         discard_blank = 1;
653         if (cq->bytes_in == r->bytes_read_ckpt) {
654             keepalive_request_start = 1;
655             if (NULL != c) { /* !chunkqueue_is_empty(cq)) */
656                 pipelined_request_start = 1;
657                 /* partial header of next request has already been read,
658                  * so optimistically check for more data received on
659                  * socket while processing the previous request */
660                 con->is_readable = 1;
661                 /*(if partially read next request and unable to read any bytes,
662                  * then will unnecessarily scan again before subsequent read)*/
663             }
664         }
665     }
666 
667     do {
668         if (NULL == c) continue;
669         clen = buffer_clen(c->mem) - c->offset;
670         if (0 == clen) continue;
671         if (__builtin_expect( (c->offset > USHRT_MAX), 0)) /*(highly unlikely)*/
672             chunkqueue_compact_mem_offset(cq);
673 
674         hoff[0] = 1;                         /* number of lines */
675         hoff[1] = (unsigned short)c->offset; /* base offset for all lines */
676         /*hoff[2] = ...;*/                   /* offset from base for 2nd line */
677 
678         header_len = http_header_parse_hoff(c->mem->ptr + c->offset,clen,hoff);
679 
680         /* casting to (unsigned short) might truncate, and the hoff[]
681          * addition might overflow, but max_request_field_size is USHRT_MAX,
682          * so failure will be detected below */
683         const uint32_t max_request_field_size = r->conf.max_request_field_size;
684         if ((header_len ? header_len : clen) > max_request_field_size
685             || hoff[0] >= sizeof(hoff)/sizeof(hoff[0])-1) {
686             log_error(r->conf.errh, __FILE__, __LINE__, "%s",
687                       "oversized request-header -> sending Status 431");
688             r->http_status = 431; /* Request Header Fields Too Large */
689             r->keep_alive = 0;
690             return 1;
691         }
692 
693         if (__builtin_expect( (0 != header_len), 1)) {
694             if (__builtin_expect( (hoff[0] > 1), 1))
695                 break; /* common case; request headers complete */
696 
697             if (discard_blank) { /* skip one blank line e.g. following POST */
698                 if (header_len == clen) continue;
699                 const int ch = c->mem->ptr[c->offset+header_len];
700                 if (ch != '\r' && ch != '\n') {
701                     /* discard prior blank line if next line is not blank */
702                     discard_blank = 0;
703                     clen = 0;/*(for connection_read_header_more() to return c)*/
704                     c = connection_discard_blank_line(cq, header_len);/*cold*/
705                     continue;
706                 } /*(else fall through to error out in next block)*/
707             }
708         }
709 
710         if (((unsigned char *)c->mem->ptr)[c->offset] < 32) {
711             /* expecting ASCII method beginning with alpha char
712              * or HTTP/2 pseudo-header beginning with ':' */
713             /*(TLS handshake begins with SYN 0x16 (decimal 22))*/
714             log_error(r->conf.errh, __FILE__, __LINE__, "%s (%s)",
715                       c->mem->ptr[c->offset] == 0x16
716                       ? "unexpected TLS ClientHello on clear port"
717                       : "invalid request-line -> sending Status 400",
718                       con->dst_addr_buf.ptr);
719             r->http_status = 400; /* Bad Request */
720             r->keep_alive = 0;
721             return 1;
722         }
723     } while ((c = connection_read_header_more(con, cq, c, clen)));
724 
725     if (keepalive_request_start) {
726         if (cq->bytes_in > r->bytes_read_ckpt) {
727             /* update r->start_hp.tv_sec timestamp when first byte of
728              * next request is received on a keep-alive connection */
729             r->start_hp.tv_sec = log_epoch_secs;
730             if (r->conf.high_precision_timestamps)
731                 log_clock_gettime_realtime(&r->start_hp);
732         }
733         if (pipelined_request_start && c)
734             con->read_idle_ts = log_monotonic_secs;
735     }
736 
737     if (NULL == c) return 0; /* incomplete request headers */
738 
739   #ifdef __COVERITY__
740     if (buffer_clen(c->mem) < hoff[1]) {
741         return 1;
742     }
743   #endif
744 
745     char * const hdrs = c->mem->ptr + hoff[1];
746 
747     if (con->request_count > 1) {
748         /* adjust r->bytes_read_ckpt for http_request_stats_bytes_in()
749          * (headers_len is still in cq; marked written, bytes_out incr below) */
750         r->bytes_read_ckpt = cq->bytes_out;
751         /* clear buffers which may have been kept for reporting on keep-alive,
752          * (e.g. mod_status) */
753         request_reset_ex(r);
754     }
755     /* RFC7540 3.5 HTTP/2 Connection Preface
756      * "PRI * HTTP/2.0\r\n\r\nSM\r\n\r\n"
757      * (Connection Preface MUST be exact match)
758      * If ALT-SVC used to advertise HTTP/2, then client might start
759      * http connection (not TLS) sending HTTP/2 connection preface.
760      * (note: intentionally checking only on initial request) */
761     else if (!con->is_ssl_sock && r->conf.h2proto
762              && hoff[0] == 2 && hoff[2] == 16
763              && hdrs[0]=='P' && hdrs[1]=='R' && hdrs[2]=='I' && hdrs[3]==' ') {
764         r->http_version = HTTP_VERSION_2;
765         return 0;
766     }
767 
768     r->rqst_header_len = header_len;
769     if (r->conf.log_request_header)
770         log_error_multiline(r->conf.errh, __FILE__, __LINE__,
771                             hdrs, header_len, "fd:%d rqst: ", con->fd);
772     http_request_headers_process(r, hdrs, hoff, con->proto_default_port);
773     chunkqueue_mark_written(cq, r->rqst_header_len);
774 
775     if (light_btst(r->rqst_htags, HTTP_HEADER_UPGRADE)
776         && 0 == r->http_status
777         && h2_check_con_upgrade_h2c(r)) {
778         /*(Upgrade: h2c over cleartext does not have SNI; no COMP_HTTP_HOST)*/
779         r->conditional_is_valid = (1 << COMP_SERVER_SOCKET)
780                                 | (1 << COMP_HTTP_REMOTE_IP);
781         r->bytes_read_ckpt = 0;
782         /*connection_handle_write(r, con);*//* defer write to network */
783         return 0;
784     }
785 
786     return 1;
787 }
788 
789 
connection_handle_fdevent(void * const context,const int revents)790 static handler_t connection_handle_fdevent(void * const context, const int revents) {
791     connection * restrict con = context;
792     const int is_ssl_sock = con->is_ssl_sock;
793 
794     joblist_append(con);
795 
796     if (revents & ~(FDEVENT_IN | FDEVENT_OUT))
797         con->revents_err |= (revents & ~(FDEVENT_IN | FDEVENT_OUT));
798 
799     if (revents & (FDEVENT_IN | FDEVENT_OUT)) {
800         if (is_ssl_sock) /*(ssl may read and write for both reads and writes)*/
801             con->is_readable = con->is_writable = 1;
802         else {
803             if (revents & FDEVENT_IN)
804                 con->is_readable = 1;
805             if (revents & FDEVENT_OUT)
806                 con->is_writable = 1;
807         }
808     }
809 
810     return HANDLER_FINISHED;
811 }
812 
813 
814 __attribute_cold__
connection_read_cq_err(connection * con)815 static int connection_read_cq_err(connection *con) {
816     request_st * const r = &con->request;
817   #if defined(__WIN32)
818     int lastError = WSAGetLastError();
819     switch (lastError) {
820     case EAGAIN:
821         return 0;
822     case EINTR:
823         /* we have been interrupted before we could read */
824         con->is_readable = 1;
825         return 0;
826     case ECONNRESET:
827         /* suppress logging for this error, expected for keep-alive */
828         break;
829     default:
830         log_error(r->conf.errh, __FILE__, __LINE__,
831           "connection closed - recv failed: %d", lastError);
832         break;
833     }
834   #else /* __WIN32 */
835     switch (errno) {
836     case EAGAIN:
837         return 0;
838     case EINTR:
839         /* we have been interrupted before we could read */
840         con->is_readable = 1;
841         return 0;
842     case ECONNRESET:
843         /* suppress logging for this error, expected for keep-alive */
844         break;
845     default:
846         log_perror(r->conf.errh, __FILE__, __LINE__,
847           "connection closed - read failed");
848         break;
849     }
850   #endif /* __WIN32 */
851 
852     connection_set_state_error(r, CON_STATE_ERROR);
853     return -1;
854 }
855 
856 
857 /* 0: everything ok, -1: error, -2: con closed */
connection_read_cq(connection * con,chunkqueue * cq,off_t max_bytes)858 static int connection_read_cq(connection *con, chunkqueue *cq, off_t max_bytes) {
859     ssize_t len;
860     size_t mem_len = 0;
861 
862     do {
863         /* obtain chunk memory into which to read
864          * fill previous chunk if it has a reasonable amount of space available
865          * (use mem_len=0 to obtain large buffer at least half of chunk_buf_sz)
866          */
867         chunk *ckpt = cq->last;
868         char * const mem = chunkqueue_get_memory(cq, &mem_len);
869         if (mem_len > (size_t)max_bytes) mem_len = (size_t)max_bytes;
870 
871       #if defined(__WIN32)
872         len = recv(con->fd, mem, mem_len, 0);
873       #else
874         len = read(con->fd, mem, mem_len);
875       #endif
876 
877         chunkqueue_use_memory(cq, ckpt, len > 0 ? len : 0);
878 
879         if (len != (ssize_t)mem_len) {
880             /* we got less than expected, wait for the next fd-event */
881             con->is_readable = 0;
882             return len > 0 ? 0 : 0 == len ? -2 : connection_read_cq_err(con);
883         }
884 
885         max_bytes -= len;
886 
887         int frd;
888         mem_len = (0 == fdevent_ioctl_fionread(con->fd, S_IFSOCK, &frd))
889           ? (frd < max_bytes) ? (size_t)frd : (size_t)max_bytes
890           : 0;
891     } while (max_bytes);
892     return 0;
893 }
894 
895 
connection_write_cq(connection * con,chunkqueue * cq,off_t max_bytes)896 static int connection_write_cq(connection *con, chunkqueue *cq, off_t max_bytes) {
897     request_st * const r = &con->request;
898     return con->srv->network_backend_write(con->fd,cq,max_bytes,r->conf.errh);
899 }
900 
901 
902 static handler_t connection_handle_read_post_state(request_st * const r);
903 
connection_accepted(server * srv,const server_socket * srv_socket,sock_addr * cnt_addr,int cnt)904 connection *connection_accepted(server *srv, const server_socket *srv_socket, sock_addr *cnt_addr, int cnt) {
905 		connection *con;
906 
907 		srv->cur_fds++;
908 
909 		/* ok, we have the connection, register it */
910 #if 0
911 		log_error(srv->errh, __FILE__, __LINE__, "accepted() %d", cnt);
912 #endif
913 
914 		con = connections_get_new_connection(srv);
915 
916 		con->fd = cnt;
917 		con->fdn = fdevent_register(srv->ev, con->fd, connection_handle_fdevent, con);
918 		con->network_read = connection_read_cq;
919 		con->network_write = connection_write_cq;
920 		con->reqbody_read = connection_handle_read_post_state;
921 
922 		request_st * const r = &con->request;
923 		connection_set_state(r, CON_STATE_REQUEST_START);
924 
925 		con->connection_start = log_monotonic_secs;
926 		con->dst_addr = *cnt_addr;
927 		sock_addr_cache_inet_ntop_copy_buffer(&con->dst_addr_buf,
928 		                                      &con->dst_addr);
929 		con->srv_socket = srv_socket;
930 		con->is_ssl_sock = srv_socket->is_ssl;
931 		con->proto_default_port = 80; /* "http" */
932 
933 		config_cond_cache_reset(r);
934 		r->conditional_is_valid = (1 << COMP_SERVER_SOCKET)
935 		                        | (1 << COMP_HTTP_REMOTE_IP);
936 
937 		if (HANDLER_GO_ON != plugins_call_handle_connection_accept(con)) {
938 			connection_reset(con);
939 			connection_close(con);
940 			return NULL;
941 		}
942 		if (r->http_status < 0) connection_set_state(r, CON_STATE_WRITE);
943 		return con;
944 }
945 
946 
947 __attribute_cold__
948 __attribute_noinline__
__attribute_nonnull__()949 __attribute_nonnull__()
950 static void
951 connection_log_state (const request_st * const r, const char * const tag)
952 {
953     buffer * const tb = r->tmp_buf;
954     buffer_clear(tb);
955     http_request_state_append(tb, r->state);
956     log_error(r->conf.errh, __FILE__, __LINE__,
957       "fd:%d id:%d state:%s%s", r->con->fd, r->h2id, tb->ptr, tag);
958 }
959 
960 
961 static void connection_state_machine_h2 (connection * const con);
962 
963 
964 static void
connection_state_machine_loop(request_st * const r,connection * const con)965 connection_state_machine_loop (request_st * const r, connection * const con)
966 {
967 	request_state_t ostate;
968 	do {
969 		if (r->conf.log_state_handling)
970 			connection_log_state(r, "");
971 
972 		switch ((ostate = r->state)) {
973 		case CON_STATE_REQUEST_START: /* transient */
974 			/*(should not be reached by HTTP/2 streams)*/
975 			r->start_hp.tv_sec = log_epoch_secs;
976 			con->read_idle_ts = log_monotonic_secs;
977 			if (r->conf.high_precision_timestamps)
978 				log_clock_gettime_realtime(&r->start_hp);
979 
980 			con->request_count++;
981 			r->loops_per_request = 0;
982 
983 			connection_set_state(r, CON_STATE_READ);
984 			__attribute_fallthrough__
985 		case CON_STATE_READ:
986 			/*(should not be reached by HTTP/2 streams)*/
987 			if (!connection_handle_read_state(con)) {
988 				if (r->http_version == HTTP_VERSION_2) {
989 					connection_transition_h2(r, con);
990 					connection_state_machine_h2(con);
991 					return;
992 				}
993 				break;
994 			}
995 			/*connection_set_state(r, CON_STATE_REQUEST_END);*/
996 			__attribute_fallthrough__
997 		case CON_STATE_REQUEST_END: /* transient */
998 			connection_set_state(r,
999 			  (0 == r->reqbody_length)
1000 			  ? CON_STATE_HANDLE_REQUEST
1001 			  : CON_STATE_READ_POST);
1002 			__attribute_fallthrough__
1003 		case CON_STATE_READ_POST:
1004 		case CON_STATE_HANDLE_REQUEST:
1005 			switch (http_response_handler(r)) {
1006 			  case HANDLER_GO_ON:/*CON_STATE_RESPONSE_START occurred;transient*/
1007 			  case HANDLER_FINISHED:
1008 				break;
1009 			  case HANDLER_WAIT_FOR_EVENT:
1010 				return;
1011 			  /*case HANDLER_COMEBACK:*//*(not expected)*/
1012 			  /*case HANDLER_ERROR:*/
1013 			  default:
1014 				connection_set_state_error(r, CON_STATE_ERROR);
1015 				continue;
1016 			}
1017 			/*__attribute_fallthrough__*/
1018 		/*case CON_STATE_RESPONSE_START:*//*occurred;transient*/
1019 			if (r->http_version > HTTP_VERSION_1_1)
1020 				h2_send_headers(r, con);
1021 			else
1022 				http_response_write_header(r);
1023 			connection_set_state(r, CON_STATE_WRITE);
1024 			__attribute_fallthrough__
1025 		case CON_STATE_WRITE:
1026 			if (connection_handle_write_state(r, con) == CON_STATE_WRITE)
1027 				return;
1028 			__attribute_fallthrough__
1029 		case CON_STATE_RESPONSE_END: /* transient */
1030 		case CON_STATE_ERROR:        /* transient */
1031 			if (r->http_version > HTTP_VERSION_1_1 && r != &con->request)
1032 				return;
1033 			connection_handle_response_end_state(r, con);
1034 			/*(make sure ostate will not match r->state)*/
1035 			ostate = CON_STATE_RESPONSE_END;/* != r->state */
1036 			break;
1037 		case CON_STATE_CLOSE:
1038 			/*(should not be reached by HTTP/2 streams)*/
1039 			connection_handle_close_state(con);
1040 			break;
1041 		case CON_STATE_CONNECT:
1042 			break;
1043 		default:/*(should not happen)*/
1044 			/*connection_log_state(r, "");*/ /*(unknown state)*/
1045 			break;
1046 		}
1047 	} while (ostate != (request_state_t)r->state);
1048 }
1049 
1050 
1051 __attribute_cold__
1052 static void
connection_revents_err(request_st * const r,connection * const con)1053 connection_revents_err (request_st * const r, connection * const con)
1054 {
1055     /* defer handling FDEVENT_HUP and FDEVENT_ERR to here in order to
1056      * first attempt (in callers) to read data in kernel socket buffers */
1057     /*assert(con->revents_err & ~(FDEVENT_IN | FDEVENT_OUT));*/
1058     const int revents = (int)con->revents_err;
1059     con->revents_err = 0;
1060 
1061     if (r->state == CON_STATE_CLOSE)
1062         con->close_timeout_ts = log_monotonic_secs - (HTTP_LINGER_TIMEOUT+1);
1063     else if (revents & FDEVENT_HUP)
1064         connection_set_state_error(r, CON_STATE_ERROR);
1065     else if (revents & FDEVENT_RDHUP) {
1066         int events = fdevent_fdnode_interest(con->fdn);
1067         events &= ~(FDEVENT_IN|FDEVENT_RDHUP);
1068         r->conf.stream_request_body &=
1069           ~(FDEVENT_STREAM_REQUEST_BUFMIN|FDEVENT_STREAM_REQUEST_POLLIN);
1070         r->conf.stream_request_body |= FDEVENT_STREAM_REQUEST_POLLRDHUP;
1071         con->is_readable = 1; /*(can read 0 for end-of-stream)*/
1072         if (chunkqueue_is_empty(con->read_queue)) r->keep_alive = 0;
1073         if (r->reqbody_length < -1)/*(transparent proxy mode; no more rd data)*/
1074             r->reqbody_length = r->reqbody_queue.bytes_in;
1075         if (sock_addr_get_family(&con->dst_addr) == AF_UNIX) {
1076             /* future: will getpeername() on AF_UNIX check if still connected?*/
1077             fdevent_fdnode_event_set(con->srv->ev, con->fdn, events);
1078         }
1079         else if (fdevent_is_tcp_half_closed(con->fd)) {
1080             /* Success of fdevent_is_tcp_half_closed() after FDEVENT_RDHUP
1081              * indicates TCP FIN received, but does not distinguish between
1082              * client shutdown(fd, SHUT_WR) and client close(fd).  Remove
1083              * FDEVENT_RDHUP so that we do not spin on ready event.  However,
1084              * a later TCP RST will not be detected until next write to socket.
1085              * future: might getpeername() to check for TCP RST on half-closed
1086              * sockets (without FDEVENT_RDHUP interest) when checking for write
1087              * timeouts once a second in server.c, though getpeername() on
1088              * Windows might not indicate this */
1089             r->conf.stream_request_body |= FDEVENT_STREAM_REQUEST_TCP_FIN;
1090             fdevent_fdnode_event_set(con->srv->ev, con->fdn, events);
1091         }
1092         else {
1093             /* Failure of fdevent_is_tcp_half_closed() indicates TCP RST
1094              * (or unable to tell (unsupported OS), though should not
1095              * be setting FDEVENT_RDHUP in that case) */
1096             connection_set_state_error(r, CON_STATE_ERROR);
1097         }
1098     }
1099     else if (revents & FDEVENT_ERR)  /* error, connection reset */
1100         connection_set_state_error(r, CON_STATE_ERROR);
1101     else
1102         log_error(r->conf.errh, __FILE__, __LINE__,
1103           "connection closed: poll() -> ??? %d", revents);
1104 }
1105 
1106 
1107 static void
connection_set_fdevent_interest(request_st * const r,connection * const con)1108 connection_set_fdevent_interest (request_st * const r, connection * const con)
1109 {
1110     if (con->fd < 0) return;
1111 
1112     if (con->revents_err && r->state != CON_STATE_ERROR) {
1113         connection_revents_err(r, con); /* resets con->revents_err = 0 */
1114         connection_state_machine(con);
1115         return;
1116         /* connection_state_machine() will end up calling back into
1117          * connection_set_fdevent_interest(), but with 0 == con->revents_err */
1118     }
1119 
1120     int n = 0;
1121     switch(r->state) {
1122       case CON_STATE_READ:
1123         n = FDEVENT_IN;
1124         if (!(r->conf.stream_request_body & FDEVENT_STREAM_REQUEST_POLLRDHUP))
1125             n |= FDEVENT_RDHUP;
1126         break;
1127       case CON_STATE_WRITE:
1128         if (!chunkqueue_is_empty(con->write_queue)
1129             && 0 == con->is_writable && 0 == con->traffic_limit_reached)
1130             n |= FDEVENT_OUT;
1131         __attribute_fallthrough__
1132       case CON_STATE_READ_POST:
1133         if (r->conf.stream_request_body & FDEVENT_STREAM_REQUEST_POLLIN)
1134             n |= FDEVENT_IN;
1135         if (!(r->conf.stream_request_body & FDEVENT_STREAM_REQUEST_POLLRDHUP))
1136             n |= FDEVENT_RDHUP;
1137         break;
1138       case CON_STATE_CLOSE:
1139         n = FDEVENT_IN;
1140         break;
1141       case CON_STATE_CONNECT:
1142         return;
1143       default:
1144         break;
1145     }
1146 
1147     const int events = fdevent_fdnode_interest(con->fdn);
1148     if (con->is_readable < 0) {
1149         con->is_readable = 0;
1150         n |= FDEVENT_IN;
1151     }
1152     if (con->is_writable < 0) {
1153         con->is_writable = 0;
1154         n |= FDEVENT_OUT;
1155     }
1156     if (events & FDEVENT_RDHUP)
1157         n |= FDEVENT_RDHUP;
1158 
1159     if (n == events) return;
1160 
1161     /* update timestamps when enabling interest in events */
1162     if ((n & FDEVENT_IN) && !(events & FDEVENT_IN))
1163         con->read_idle_ts = log_monotonic_secs;
1164     if ((n & FDEVENT_OUT) && !(events & FDEVENT_OUT))
1165         con->write_request_ts = log_monotonic_secs;
1166     fdevent_fdnode_event_set(con->srv->ev, con->fdn, n);
1167 }
1168 
1169 
1170 __attribute_cold__
1171 static void
connection_request_end_h2(request_st * const h2r,connection * const con)1172 connection_request_end_h2 (request_st * const h2r, connection * const con)
1173 {
1174     if (h2r->keep_alive >= 0) {
1175         h2r->keep_alive = -1;
1176         h2_send_goaway(con, H2_E_NO_ERROR);
1177         http_response_delay(con);
1178     }
1179     else /*(abort connection upon second request to close h2 connection)*/
1180         h2_send_goaway(con, H2_E_ENHANCE_YOUR_CALM);
1181 }
1182 
1183 
1184 static void
connection_state_machine_h2(connection * const con)1185 connection_state_machine_h2 (connection * const con)
1186 {
1187     h2con * const h2c = con->h2;
1188     request_st * const h2r = &con->request;
1189 
1190     if (h2c->sent_goaway <= 0
1191         && (chunkqueue_is_empty(con->read_queue) || h2_parse_frames(con))
1192         && con->is_readable > 0) {
1193         chunkqueue * const cq = con->read_queue;
1194         const off_t mark = cq->bytes_in;
1195         if (0 == con->network_read(con, cq, MAX_READ_LIMIT)) {
1196             if (mark < cq->bytes_in)
1197                 h2_parse_frames(con);
1198         }
1199         else {
1200             /* network error; do not send GOAWAY, but pretend that we did */
1201             h2c->sent_goaway = H2_E_CONNECT_ERROR; /*any error (not NO_ERROR)*/
1202             connection_set_state_error(h2r, CON_STATE_ERROR);
1203         }
1204     }
1205 
1206     /* process requests on HTTP/2 streams */
1207     int resched = 0;
1208     if (h2c->sent_goaway <= 0 && h2c->rused) {
1209         /* coarse check for write throttling
1210          * (connection.kbytes-per-second, server.kbytes-per-second)
1211          * obtain an approximate limit, not refreshed per request_st,
1212          * even though we are not calculating response HEADERS frames
1213          * or frame overhead here */
1214         off_t max_bytes = con->is_writable > 0
1215           ? connection_write_throttle(con, MAX_WRITE_LIMIT)
1216           : 0;
1217         const off_t cqlen = chunkqueue_length(con->write_queue);
1218         if (cqlen > 8192 && max_bytes > 65536) max_bytes = 65536;
1219         max_bytes -= cqlen;
1220         if (max_bytes < 0) max_bytes = 0;
1221 
1222         /* XXX: to avoid buffer bloat due to staging too much data in
1223          * con->write_queue, consider setting limit on how much is staged
1224          * for sending on con->write_queue: adjusting max_bytes down */
1225 
1226         for (uint32_t i = 0; i < h2c->rused; ++i) {
1227             request_st * const r = h2c->r[i];
1228             /* future: might track read/write interest per request
1229              * to avoid iterating through all active requests */
1230 
1231             connection_state_machine_loop(r, con);
1232 
1233           #if 0
1234             if (r->conf.log_state_handling)
1235                 connection_log_state(r, " at loop exit");
1236           #endif
1237 
1238             if (r->state < CON_STATE_WRITE)
1239                 continue;
1240             /* else CON_STATE_WRITE, CON_STATE_RESPONSE_END, CON_STATE_ERROR */
1241             else if (r->state == CON_STATE_WRITE) {
1242                 if (__builtin_expect((!chunkqueue_is_empty(&r->write_queue)), 1)
1243                     && max_bytes
1244                     && (r->resp_body_finished
1245                         || (r->conf.stream_response_body
1246                             & (FDEVENT_STREAM_RESPONSE
1247                               |FDEVENT_STREAM_RESPONSE_BUFMIN)))) {
1248                     /*(subtract 9 byte HTTP/2 frame overhead from each 16k DATA
1249                      * frame for more efficient sending of large files)*/
1250                     /*(use smaller max per stream if marked 'incremental' (w/ 0)
1251                      * to give more streams a chance to send in parallel)*/
1252                     uint32_t dlen = (r->h2_prio & 1) ? 32768-18 : 8192;
1253                     if (dlen > (uint32_t)max_bytes) dlen = (uint32_t)max_bytes;
1254                     dlen = h2_send_cqdata(r, con, &r->write_queue, dlen);
1255                     if (dlen) { /*(do not resched (spin) if swin empty window)*/
1256                         max_bytes -= (off_t)dlen;
1257                         if (!chunkqueue_is_empty(&r->write_queue))
1258                             resched |= 1;
1259                     }
1260                 }
1261                 if (!chunkqueue_is_empty(&r->write_queue)
1262                     || !r->resp_body_finished)
1263                     continue;
1264 
1265                 connection_set_state(r, CON_STATE_RESPONSE_END);
1266                 if (__builtin_expect( (r->conf.log_state_handling), 0))
1267                     connection_log_state(r, "");
1268             }
1269 
1270             {/*(r->state==CON_STATE_RESPONSE_END || r->state==CON_STATE_ERROR)*/
1271                 /*(trigger reschedule of con if frames pending)*/
1272                 if (h2c->rused == sizeof(h2c->r)/sizeof(*h2c->r)
1273                     && !chunkqueue_is_empty(con->read_queue))
1274                     resched |= 2;
1275                 h2_send_end_stream(r, con);
1276                 const int alive = r->keep_alive;
1277                 h2_retire_stream(r, con);/*r invalidated;removed from h2c->r[]*/
1278                 --i;/* adjust loop i; h2c->rused was modified to retire r */
1279                 /*(special-case: allow *stream* to set r->keep_alive = -1 to
1280                  * trigger goaway on h2 connection, e.g. after mod_auth failure
1281                  * in attempt to mitigate brute force attacks by forcing a
1282                  * reconnect and (somewhat) slowing down retries)*/
1283                 if (alive < 0)
1284                     connection_request_end_h2(h2r, con);
1285             }
1286         }
1287 
1288         if (0 == max_bytes) resched |= 1;
1289     }
1290 
1291     if (h2c->sent_goaway > 0 && h2c->rused) {
1292         /* retire streams if an error has occurred
1293          * note: this is not done to other streams in the loop above
1294          * (besides the current stream in the loop) due to the specific
1295          * implementation above, where doing so would mess up the iterator */
1296         for (uint32_t i = 0; i < h2c->rused; ++i) {
1297             request_st * const r = h2c->r[i];
1298             /*assert(r->h2state == H2_STATE_CLOSED);*/
1299             h2_retire_stream(r, con);/*r invalidated;removed from h2c->r[]*/
1300             --i;/* adjust loop i; h2c->rused was modified to retire r */
1301         }
1302         /* XXX: ? should we discard con->write_queue
1303          *        and change h2r->state to CON_STATE_RESPONSE_END ? */
1304     }
1305 
1306     if (h2r->state == CON_STATE_WRITE) {
1307         /* write HTTP/2 frames to socket */
1308         if (!chunkqueue_is_empty(con->write_queue))
1309             connection_handle_write(h2r, con);
1310 
1311         if (chunkqueue_is_empty(con->write_queue)
1312             && 0 == h2c->rused && h2c->sent_goaway)
1313             connection_set_state(h2r, CON_STATE_RESPONSE_END);
1314     }
1315 
1316     if (h2r->state == CON_STATE_WRITE) {
1317         /* (resched & 1) more data is available to write, if still able to write
1318          * (resched & 2) resched to read deferred frames from con->read_queue */
1319         /*(con->is_writable set to 0 if !chunkqueue_is_empty(con->write_queue)
1320          * after trying to write in connection_handle_write() above)*/
1321         if (((resched & 1) && con->is_writable>0 && !con->traffic_limit_reached)
1322             || (resched & 2))
1323             joblist_append(con);
1324 
1325         if (h2_want_read(con))
1326             h2r->conf.stream_request_body |=  FDEVENT_STREAM_REQUEST_POLLIN;
1327         else
1328             h2r->conf.stream_request_body &= ~FDEVENT_STREAM_REQUEST_POLLIN;
1329     }
1330     else /* e.g. CON_STATE_RESPONSE_END or CON_STATE_ERROR */
1331         connection_state_machine_loop(h2r, con);
1332 
1333     connection_set_fdevent_interest(h2r, con);
1334 }
1335 
1336 
1337 static void
connection_state_machine_h1(connection * const con)1338 connection_state_machine_h1 (connection * const con)
1339 {
1340 	request_st * const r = &con->request;
1341 	connection_state_machine_loop(r, con);
1342 
1343 	if (r->conf.log_state_handling)
1344 		connection_log_state(r, " at loop exit");
1345 
1346 	connection_set_fdevent_interest(r, con);
1347 }
1348 
1349 
1350 void
connection_state_machine(connection * const con)1351 connection_state_machine (connection * const con)
1352 {
1353     if (con->h2)
1354         connection_state_machine_h2(con);
1355     else /* if (r->http_version <= HTTP_VERSION_1_1) */
1356         connection_state_machine_h1(con);
1357 }
1358 
1359 
connection_check_timeout(connection * const con,const unix_time64_t cur_ts)1360 static void connection_check_timeout (connection * const con, const unix_time64_t cur_ts) {
1361     const int waitevents = fdevent_fdnode_interest(con->fdn);
1362     int changed = 0;
1363     int t_diff;
1364 
1365     request_st * const r = &con->request;
1366     if (r->state == CON_STATE_CLOSE) {
1367         if (cur_ts - con->close_timeout_ts > HTTP_LINGER_TIMEOUT) {
1368             changed = 1;
1369         }
1370     }
1371     else if (con->h2 && r->state == CON_STATE_WRITE) {
1372         h2con * const h2c = con->h2;
1373         if (h2c->rused) {
1374             for (uint32_t i = 0; i < h2c->rused; ++i) {
1375                 request_st * const rr = h2c->r[i];
1376                 if (rr->state == CON_STATE_ERROR) { /*(should not happen)*/
1377                     changed = 1;
1378                     continue;
1379                 }
1380                 if (rr->reqbody_length != rr->reqbody_queue.bytes_in) {
1381                     /* XXX: should timeout apply if not trying to read on h2con?
1382                      * (still applying timeout to catch stuck connections) */
1383                     /* XXX: con->read_idle_ts is not per-request, so timeout
1384                      * will not occur if other read activity occurs on h2con
1385                      * (future: might keep separate timestamp per-request) */
1386                     if (cur_ts - con->read_idle_ts > rr->conf.max_read_idle) {
1387                         /* time - out */
1388                         if (rr->conf.log_request_handling) {
1389                             log_error(rr->conf.errh, __FILE__, __LINE__,
1390                               "request aborted - read timeout: %d", con->fd);
1391                         }
1392                         connection_set_state_error(r, CON_STATE_ERROR);
1393                         changed = 1;
1394                     }
1395                 }
1396 
1397                 if (rr->state != CON_STATE_READ_POST
1398                     && con->write_request_ts != 0) {
1399                     /* XXX: con->write_request_ts is not per-request, so timeout
1400                      * will not occur if other write activity occurs on h2con
1401                      * (future: might keep separate timestamp per-request) */
1402                     if (cur_ts - con->write_request_ts
1403                         > r->conf.max_write_idle) {
1404                         /*(see comment further down about max_write_idle)*/
1405                         /* time - out */
1406                         if (r->conf.log_timeouts) {
1407                             log_error(r->conf.errh, __FILE__, __LINE__,
1408                               "NOTE: a request from %s for %.*s timed out "
1409                               "after writing %lld bytes. We waited %d seconds. "
1410                               "If this is a problem, increase "
1411                               "server.max-write-idle",
1412                               r->dst_addr_buf->ptr,
1413                               BUFFER_INTLEN_PTR(&r->target),
1414                               (long long)r->write_queue.bytes_out,
1415                               (int)r->conf.max_write_idle);
1416                         }
1417                         connection_set_state_error(r, CON_STATE_ERROR);
1418                         changed = 1;
1419                     }
1420                 }
1421             }
1422         }
1423         else {
1424             if (cur_ts - con->read_idle_ts > con->keep_alive_idle) {
1425                 /* time - out */
1426                 if (r->conf.log_request_handling) {
1427                     log_error(r->conf.errh, __FILE__, __LINE__,
1428                               "connection closed - keep-alive timeout: %d",
1429                               con->fd);
1430                 }
1431                 connection_set_state(r, CON_STATE_RESPONSE_END);
1432                 changed = 1;
1433             }
1434         }
1435         /* process changes before optimistic read of additional HTTP/2 frames */
1436         if (changed)
1437             con->is_readable = 0;
1438     }
1439     else if (waitevents & FDEVENT_IN) {
1440         if (con->request_count == 1 || r->state != CON_STATE_READ) {
1441             /* e.g. CON_STATE_READ_POST || CON_STATE_WRITE */
1442             if (cur_ts - con->read_idle_ts > r->conf.max_read_idle) {
1443                 /* time - out */
1444                 if (r->conf.log_request_handling) {
1445                     log_error(r->conf.errh, __FILE__, __LINE__,
1446                               "connection closed - read timeout: %d", con->fd);
1447                 }
1448 
1449                 connection_set_state_error(r, CON_STATE_ERROR);
1450                 changed = 1;
1451             }
1452         } else {
1453             if (cur_ts - con->read_idle_ts > con->keep_alive_idle) {
1454                 /* time - out */
1455                 if (r->conf.log_request_handling) {
1456                     log_error(r->conf.errh, __FILE__, __LINE__,
1457                               "connection closed - keep-alive timeout: %d",
1458                               con->fd);
1459                 }
1460 
1461                 connection_set_state_error(r, CON_STATE_ERROR);
1462                 changed = 1;
1463             }
1464         }
1465     }
1466 
1467     /* max_write_idle timeout currently functions as backend timeout,
1468      * too, after response has been started.
1469      * Although backend timeouts now exist, there is no default for timeouts
1470      * to backends, so were this client timeout now to be changed to check
1471      * for write interest to the client, then timeout would not occur if the
1472      * backend hung and there was no backend read timeout set.  Therefore,
1473      * max_write_idle timeout remains timeout for both reading from backend
1474      * and writing to client, though this check here is only for HTTP/1.1.
1475      * In the future, if there were a quick way to detect that a backend
1476      * read timeout was in effect, then this timeout could check for write
1477      * interest to client.  (not a priority) */
1478     /*if (waitevents & FDEVENT_OUT)*/
1479     if (r->http_version <= HTTP_VERSION_1_1
1480         && r->state == CON_STATE_WRITE && con->write_request_ts != 0) {
1481       #if 0
1482         if (cur_ts - con->write_request_ts > 60) {
1483             log_error(r->conf.errh, __FILE__, __LINE__,
1484                       "connection closed - pre-write-request-timeout: %d %d",
1485                       con->fd, cur_ts - con->write_request_ts);
1486         }
1487       #endif
1488 
1489         if (cur_ts - con->write_request_ts > r->conf.max_write_idle) {
1490             /* time - out */
1491             if (r->conf.log_timeouts) {
1492                 log_error(r->conf.errh, __FILE__, __LINE__,
1493                   "NOTE: a request from %s for %.*s timed out after writing "
1494                   "%lld bytes. We waited %d seconds. If this is a problem, "
1495                   "increase server.max-write-idle",
1496                   r->dst_addr_buf->ptr,
1497                   BUFFER_INTLEN_PTR(&r->target),
1498                   (long long)con->write_queue->bytes_out,
1499                   (int)r->conf.max_write_idle);
1500             }
1501             connection_set_state_error(r, CON_STATE_ERROR);
1502             changed = 1;
1503         }
1504     }
1505 
1506     /* lighttpd HTTP/2 limitation: rate limit config r->conf.bytes_per_second
1507      * (currently) taken only from top-level config (socket), with host if SNI
1508      * used, but not any other config conditions, e.g. not per-file-type */
1509 
1510     if (0 == (t_diff = cur_ts - con->connection_start)) t_diff = 1;
1511 
1512     if (con->traffic_limit_reached &&
1513         (r->conf.bytes_per_second == 0
1514          || con->write_queue->bytes_out
1515               < (off_t)r->conf.bytes_per_second * t_diff)) {
1516         /* enable connection again */
1517         con->traffic_limit_reached = 0;
1518 
1519         changed = 1;
1520     }
1521 
1522     con->bytes_written_cur_second = 0;
1523 
1524     if (changed) {
1525         connection_state_machine(con);
1526     }
1527 }
1528 
connection_periodic_maint(server * const srv,const unix_time64_t cur_ts)1529 void connection_periodic_maint (server * const srv, const unix_time64_t cur_ts) {
1530     /* check all connections for timeouts */
1531     for (connection *con = srv->conns, *tc; con; con = tc) {
1532         tc = con->next;
1533         connection_check_timeout(con, cur_ts);
1534     }
1535 }
1536 
connection_graceful_shutdown_maint(server * srv)1537 void connection_graceful_shutdown_maint (server *srv) {
1538     const int graceful_expire =
1539       (srv->graceful_expire_ts && srv->graceful_expire_ts < log_monotonic_secs);
1540     for (connection *con = srv->conns, *tc; con; con = tc) {
1541         tc = con->next;
1542         int changed = 0;
1543 
1544         request_st * const r = &con->request;
1545         if (r->state == CON_STATE_CLOSE) {
1546             /* reduce remaining linger timeout to be
1547              * (from zero) *up to* one more second, but no more */
1548             if (HTTP_LINGER_TIMEOUT > 1)
1549                 con->close_timeout_ts -= (HTTP_LINGER_TIMEOUT - 1);
1550             if (log_monotonic_secs - con->close_timeout_ts > HTTP_LINGER_TIMEOUT)
1551                 changed = 1;
1552         }
1553         else if (con->h2 && r->state == CON_STATE_WRITE) {
1554             h2_send_goaway(con, H2_E_NO_ERROR);
1555             if (0 == con->h2->rused && chunkqueue_is_empty(con->write_queue)) {
1556                 connection_set_state(r, CON_STATE_RESPONSE_END);
1557                 changed = 1;
1558             }
1559         }
1560         else if (r->state == CON_STATE_READ && con->request_count > 1
1561                  && chunkqueue_is_empty(con->read_queue)) {
1562             /* close connections in keep-alive waiting for next request */
1563             connection_set_state_error(r, CON_STATE_ERROR);
1564             changed = 1;
1565         }
1566 
1567         if (graceful_expire) {
1568             connection_set_state_error(r, CON_STATE_ERROR);
1569             changed = 1;
1570         }
1571 
1572         r->keep_alive = 0;            /* disable keep-alive */
1573 
1574         r->conf.bytes_per_second = 0;         /* disable rate limit */
1575         r->conf.global_bytes_per_second = 0;  /* disable rate limit */
1576         if (con->traffic_limit_reached) {
1577             con->traffic_limit_reached = 0;
1578             changed = 1;
1579         }
1580 
1581         if (changed) {
1582             connection_state_machine(con);
1583         }
1584     }
1585 }
1586 
1587 
1588 static int
connection_handle_read_post_cq_compact(chunkqueue * const cq)1589 connection_handle_read_post_cq_compact (chunkqueue * const cq)
1590 {
1591     /* combine first mem chunk with next non-empty mem chunk
1592      * (loop if next chunk is empty) */
1593     chunk *c = cq->first;
1594     if (NULL == c) return 0;
1595     const uint32_t mlen = buffer_clen(c->mem) - (size_t)c->offset;
1596     while ((c = c->next)) {
1597         const uint32_t blen = buffer_clen(c->mem) - (size_t)c->offset;
1598         if (0 == blen) continue;
1599         chunkqueue_compact_mem(cq, mlen + blen);
1600         return 1;
1601     }
1602     return 0;
1603 }
1604 
1605 
1606 __attribute_pure__
1607 static int
connection_handle_read_post_chunked_crlf(chunkqueue * const cq)1608 connection_handle_read_post_chunked_crlf (chunkqueue * const cq)
1609 {
1610     /* caller might check chunkqueue_length(cq) >= 2 before calling here
1611      * to limit return value to either 1 for good or -1 for error */
1612     chunk *c;
1613     buffer *b;
1614     char *p;
1615     size_t len;
1616 
1617     /* caller must have called chunkqueue_remove_finished_chunks(cq), so if
1618      * chunkqueue is not empty, it contains chunk with at least one char */
1619     if (chunkqueue_is_empty(cq)) return 0;
1620 
1621     c = cq->first;
1622     b = c->mem;
1623     p = b->ptr+c->offset;
1624     if (p[0] != '\r') return -1; /* error */
1625     if (p[1] == '\n') return 1;
1626     len = buffer_clen(b) - (size_t)c->offset;
1627     if (1 != len) return -1; /* error */
1628 
1629     while (NULL != (c = c->next)) {
1630         b = c->mem;
1631         len = buffer_clen(b) - (size_t)c->offset;
1632         if (0 == len) continue;
1633         p = b->ptr+c->offset;
1634         return (p[0] == '\n') ? 1 : -1; /* error if not '\n' */
1635     }
1636     return 0;
1637 }
1638 
1639 
1640 static handler_t
connection_handle_read_post_chunked(request_st * const r,chunkqueue * const cq,chunkqueue * const dst_cq)1641 connection_handle_read_post_chunked (request_st * const r, chunkqueue * const cq, chunkqueue * const dst_cq)
1642 {
1643     /* r->conf.max_request_size is in kBytes */
1644     const off_t max_request_size = (off_t)r->conf.max_request_size << 10;
1645     off_t te_chunked = r->te_chunked;
1646     do {
1647         off_t len = chunkqueue_length(cq);
1648 
1649         while (0 == te_chunked) {
1650             char *p;
1651             chunk *c = cq->first;
1652             if (NULL == c) break;
1653             force_assert(c->type == MEM_CHUNK);
1654             p = strchr(c->mem->ptr+c->offset, '\n');
1655             if (NULL != p) { /* found HTTP chunked header line */
1656                 off_t hsz = p + 1 - (c->mem->ptr+c->offset);
1657                 unsigned char *s = (unsigned char *)c->mem->ptr+c->offset;
1658                 for (unsigned char u;(u=(unsigned char)hex2int(*s))!=0xFF;++s) {
1659                     if (te_chunked > (off_t)(1uLL<<(8*sizeof(off_t)-5))-1-2) {
1660                         log_error(r->conf.errh, __FILE__, __LINE__,
1661                           "chunked data size too large -> 400");
1662                         /* 400 Bad Request */
1663                         return http_response_reqbody_read_error(r, 400);
1664                     }
1665                     te_chunked <<= 4;
1666                     te_chunked |= u;
1667                 }
1668                 if (s == (unsigned char *)c->mem->ptr+c->offset) { /*(no hex)*/
1669                     log_error(r->conf.errh, __FILE__, __LINE__,
1670                       "chunked header invalid chars -> 400");
1671                     /* 400 Bad Request */
1672                     return http_response_reqbody_read_error(r, 400);
1673                 }
1674                 while (*s == ' ' || *s == '\t') ++s;
1675                 if (*s != '\r' && *s != ';') {
1676                     log_error(r->conf.errh, __FILE__, __LINE__,
1677                       "chunked header invalid chars -> 400");
1678                     /* 400 Bad Request */
1679                     return http_response_reqbody_read_error(r, 400);
1680                 }
1681 
1682                 if (hsz >= 1024) {
1683                     /* prevent theoretical integer overflow
1684                      * casting to (size_t) and adding 2 (for "\r\n") */
1685                     log_error(r->conf.errh, __FILE__, __LINE__,
1686                       "chunked header line too long -> 400");
1687                     /* 400 Bad Request */
1688                     return http_response_reqbody_read_error(r, 400);
1689                 }
1690 
1691                 if (0 == te_chunked) {
1692                     /* do not consume final chunked header until
1693                      * (optional) trailers received along with
1694                      * request-ending blank line "\r\n" */
1695                     if (p[0] == '\r' && p[1] == '\n') {
1696                         /*(common case with no trailers; final \r\n received)*/
1697                         hsz += 2;
1698                     }
1699                     else {
1700                         /* trailers or final CRLF crosses into next cq chunk */
1701                         hsz -= 2;
1702                         do {
1703                             c = cq->first;
1704                             p = strstr(c->mem->ptr+c->offset+hsz, "\r\n\r\n");
1705                         } while (NULL == p
1706                                  && connection_handle_read_post_cq_compact(cq));
1707                         if (NULL == p) {
1708                             /*(effectively doubles max request field size
1709                              * potentially received by backend, if in the future
1710                              * these trailers are added to request headers)*/
1711                             if ((off_t)buffer_clen(c->mem) - c->offset
1712                                 < (off_t)r->conf.max_request_field_size) {
1713                                 break;
1714                             }
1715                             else {
1716                                 /* ignore excessively long trailers;
1717                                  * disable keep-alive on connection */
1718                                 r->keep_alive = 0;
1719                                 p = c->mem->ptr + buffer_clen(c->mem)
1720                                   - 4;
1721                             }
1722                         }
1723                         hsz = p + 4 - (c->mem->ptr+c->offset);
1724                         /* trailers currently ignored, but could be processed
1725                          * here if 0 == (r->conf.stream_request_body &
1726                          *               & (FDEVENT_STREAM_REQUEST
1727                          *                 |FDEVENT_STREAM_REQUEST_BUFMIN))
1728                          * taking care to reject fields forbidden in trailers,
1729                          * making trailers available to CGI and other backends*/
1730                     }
1731                     chunkqueue_mark_written(cq, (size_t)hsz);
1732                     r->reqbody_length = dst_cq->bytes_in;
1733                     break; /* done reading HTTP chunked request body */
1734                 }
1735 
1736                 /* consume HTTP chunked header */
1737                 chunkqueue_mark_written(cq, (size_t)hsz);
1738                 len = chunkqueue_length(cq);
1739 
1740                 if (0 !=max_request_size
1741                     && (max_request_size < te_chunked
1742                      || max_request_size - te_chunked < dst_cq->bytes_in)) {
1743                     log_error(r->conf.errh, __FILE__, __LINE__,
1744                       "request-size too long: %lld -> 413",
1745                       (long long)(dst_cq->bytes_in + te_chunked));
1746                     /* 413 Payload Too Large */
1747                     return http_response_reqbody_read_error(r, 413);
1748                 }
1749 
1750                 te_chunked += 2; /*(for trailing "\r\n" after chunked data)*/
1751 
1752                 break; /* read HTTP chunked header */
1753             }
1754 
1755             /*(likely better ways to handle chunked header crossing chunkqueue
1756              * chunks, but this situation is not expected to occur frequently)*/
1757             if ((off_t)buffer_clen(c->mem) - c->offset >= 1024) {
1758                 log_error(r->conf.errh, __FILE__, __LINE__,
1759                   "chunked header line too long -> 400");
1760                 /* 400 Bad Request */
1761                 return http_response_reqbody_read_error(r, 400);
1762             }
1763             else if (!connection_handle_read_post_cq_compact(cq)) {
1764                 break;
1765             }
1766         }
1767         if (0 == te_chunked) break;
1768 
1769         if (te_chunked > 2) {
1770             if (len > te_chunked-2) len = te_chunked-2;
1771             if (dst_cq->bytes_in + te_chunked <= 64*1024) {
1772                 /* avoid buffering request bodies <= 64k on disk */
1773                 chunkqueue_steal(dst_cq, cq, len);
1774             }
1775             else if (0 != chunkqueue_steal_with_tempfiles(dst_cq, cq, len,
1776                                                           r->conf.errh)) {
1777                 /* 500 Internal Server Error */
1778                 return http_response_reqbody_read_error(r, 500);
1779             }
1780             te_chunked -= len;
1781             len = chunkqueue_length(cq);
1782         }
1783 
1784         if (len < te_chunked) break;
1785 
1786         if (2 == te_chunked) {
1787             if (-1 == connection_handle_read_post_chunked_crlf(cq)) {
1788                 log_error(r->conf.errh, __FILE__, __LINE__,
1789                   "chunked data missing end CRLF -> 400");
1790                 /* 400 Bad Request */
1791                 return http_response_reqbody_read_error(r, 400);
1792             }
1793             chunkqueue_mark_written(cq, 2);/*consume \r\n at end of chunk data*/
1794             te_chunked -= 2;
1795         }
1796 
1797     } while (!chunkqueue_is_empty(cq));
1798 
1799     r->te_chunked = te_chunked;
1800     return HANDLER_GO_ON;
1801 }
1802 
1803 
1804 static handler_t
connection_handle_read_body_unknown(request_st * const r,chunkqueue * const cq,chunkqueue * const dst_cq)1805 connection_handle_read_body_unknown (request_st * const r, chunkqueue * const cq, chunkqueue * const dst_cq)
1806 {
1807     /* r->conf.max_request_size is in kBytes */
1808     const off_t max_request_size = (off_t)r->conf.max_request_size << 10;
1809     chunkqueue_append_chunkqueue(dst_cq, cq);
1810     if (0 != max_request_size && dst_cq->bytes_in > max_request_size) {
1811         log_error(r->conf.errh, __FILE__, __LINE__,
1812           "request-size too long: %lld -> 413", (long long)dst_cq->bytes_in);
1813         /* 413 Payload Too Large */
1814         return http_response_reqbody_read_error(r, 413);
1815     }
1816     return HANDLER_GO_ON;
1817 }
1818 
1819 
1820 __attribute_cold__
1821 static int
connection_check_expect_100(request_st * const r,connection * const con)1822 connection_check_expect_100 (request_st * const r, connection * const con)
1823 {
1824     if (con->is_writable <= 0)
1825         return 1;
1826 
1827     const buffer * const vb =
1828       http_header_request_get(r, HTTP_HEADER_EXPECT,
1829                               CONST_STR_LEN("Expect"));
1830     if (NULL == vb)
1831         return 1;
1832 
1833     /* (always unset Expect header so that check is not repeated for request */
1834     int rc = buffer_eq_icase_slen(vb, CONST_STR_LEN("100-continue"));
1835     http_header_request_unset(r, HTTP_HEADER_EXPECT,
1836                               CONST_STR_LEN("Expect"));
1837     if (!rc
1838         || 0 != r->reqbody_queue.bytes_in
1839         || !chunkqueue_is_empty(&r->read_queue)
1840         || !chunkqueue_is_empty(&r->write_queue))
1841         return 1;
1842 
1843     /* send 100 Continue only if no request body data received yet
1844      * and response has not yet started (checked above) */
1845     if (r->http_version > HTTP_VERSION_1_1)
1846         h2_send_100_continue(r, con);
1847     else if (r->http_version == HTTP_VERSION_1_1)
1848         return connection_write_100_continue(r, con);
1849 
1850     return 1;
1851 }
1852 
1853 
1854 static handler_t
connection_handle_read_post_state(request_st * const r)1855 connection_handle_read_post_state (request_st * const r)
1856 {
1857     connection * const con = r->con;
1858     chunkqueue * const cq = &r->read_queue;
1859     chunkqueue * const dst_cq = &r->reqbody_queue;
1860 
1861     int is_closed = 0;
1862 
1863     if (r->http_version > HTTP_VERSION_1_1) {
1864         /*(H2_STATE_HALF_CLOSED_REMOTE or H2_STATE_CLOSED)*/
1865         if (r->h2state >= H2_STATE_HALF_CLOSED_REMOTE)
1866             is_closed = 1;
1867     }
1868     else if (con->is_readable > 0) {
1869         con->read_idle_ts = log_monotonic_secs;
1870         const off_t max_per_read =
1871           !(r->conf.stream_request_body /*(if not streaming request body)*/
1872             & (FDEVENT_STREAM_REQUEST|FDEVENT_STREAM_REQUEST_BUFMIN))
1873             ? MAX_READ_LIMIT
1874             : (r->conf.stream_request_body & FDEVENT_STREAM_REQUEST_BUFMIN)
1875               ? 16384  /* FDEVENT_STREAM_REQUEST_BUFMIN */
1876               : 65536; /* FDEVENT_STREAM_REQUEST */
1877         switch(con->network_read(con, cq, max_per_read)) {
1878         case -1:
1879             connection_set_state_error(r, CON_STATE_ERROR);
1880             return HANDLER_ERROR;
1881         case -2:
1882             is_closed = 1;
1883             break;
1884         default:
1885             break;
1886         }
1887 
1888         chunkqueue_remove_finished_chunks(cq);
1889     }
1890 
1891     /* Check for Expect: 100-continue in request headers */
1892     if (light_btst(r->rqst_htags, HTTP_HEADER_EXPECT)
1893         && !connection_check_expect_100(r, con))
1894         return HANDLER_ERROR;
1895 
1896     if (r->http_version > HTTP_VERSION_1_1) {
1897         /* h2_recv_data() places frame payload directly into r->reqbody_queue */
1898     }
1899     else if (r->reqbody_length < 0) {
1900         /*(-1: Transfer-Encoding: chunked, -2: unspecified length)*/
1901         handler_t rc = (-1 == r->reqbody_length)
1902                      ? connection_handle_read_post_chunked(r, cq, dst_cq)
1903                      : connection_handle_read_body_unknown(r, cq, dst_cq);
1904         if (HANDLER_GO_ON != rc) return rc;
1905         chunkqueue_remove_finished_chunks(cq);
1906     }
1907     else {
1908         off_t len = (off_t)r->reqbody_length - dst_cq->bytes_in;
1909         if (r->reqbody_length <= 64*1024) {
1910             /* don't buffer request bodies <= 64k on disk */
1911             chunkqueue_steal(dst_cq, cq, len);
1912         }
1913         else if (chunkqueue_length(dst_cq) + len <= 64*1024
1914                  && (!dst_cq->first || dst_cq->first->type == MEM_CHUNK)) {
1915             /* avoid tempfiles when streaming request body to fast backend */
1916             chunkqueue_steal(dst_cq, cq, len);
1917         }
1918         else if (0 !=
1919                  chunkqueue_steal_with_tempfiles(dst_cq,cq,len,r->conf.errh)) {
1920             /* writing to temp file failed */ /* Internal Server Error */
1921             return http_response_reqbody_read_error(r, 500);
1922         }
1923         chunkqueue_remove_finished_chunks(cq);
1924     }
1925 
1926     if (dst_cq->bytes_in == (off_t)r->reqbody_length) {
1927         /* Content is ready */
1928         r->conf.stream_request_body &= ~FDEVENT_STREAM_REQUEST_POLLIN;
1929         if (r->state == CON_STATE_READ_POST) {
1930             connection_set_state(r, CON_STATE_HANDLE_REQUEST);
1931         }
1932         return HANDLER_GO_ON;
1933     }
1934     else if (is_closed) {
1935       #if 0
1936         return http_response_reqbody_read_error(r, 400); /* Bad Request */
1937       #endif
1938         return HANDLER_ERROR;
1939     }
1940     else {
1941         r->conf.stream_request_body |= FDEVENT_STREAM_REQUEST_POLLIN;
1942         return (r->conf.stream_request_body & FDEVENT_STREAM_REQUEST)
1943           ? HANDLER_GO_ON
1944           : HANDLER_WAIT_FOR_EVENT;
1945     }
1946 }
1947