1 #include "first.h"
2
3 #include "base.h"
4 #include "buffer.h"
5 #include "burl.h" /* HTTP_PARSEOPT_HEADER_STRICT */
6 #include "chunk.h"
7 #include "log.h"
8 #include "connections.h"
9 #include "fdevent.h"
10 #include "h2.h"
11 #include "http_header.h"
12
13 #include "reqpool.h"
14 #include "request.h"
15 #include "response.h"
16 #include "network.h"
17 #include "stat_cache.h"
18
19 #include "plugin.h"
20 #include "plugins.h"
21
22 #include "sock_addr_cache.h"
23
24 #include <sys/stat.h>
25
26 #include <stdlib.h>
27 #include <unistd.h>
28 #include <errno.h>
29 #include <string.h>
30
31 #include "sys-socket.h"
32
33 #define HTTP_LINGER_TIMEOUT 5
34
35 #define connection_set_state(r, n) ((r)->state = (n))
36
37 __attribute_cold__
connection_set_state_error(request_st * const r,const request_state_t state)38 static void connection_set_state_error(request_st * const r, const request_state_t state) {
39 connection_set_state(r, state);
40 }
41
42 __attribute_cold__
43 static connection *connection_init(server *srv);
44
45 static void connection_reset(connection *con);
46
connections_get_new_connection(server * srv)47 static connection *connections_get_new_connection(server *srv) {
48 connection *con;
49 --srv->lim_conns;
50 if (srv->conns_pool) {
51 con = srv->conns_pool;
52 srv->conns_pool = con->next;
53 }
54 else {
55 con = connection_init(srv);
56 connection_reset(con);
57 }
58 /*con->prev = NULL;*//*(already set)*/
59 if ((con->next = srv->conns))
60 con->next->prev = con;
61 return (srv->conns = con);
62 }
63
connection_del(server * srv,connection * con)64 static void connection_del(server *srv, connection *con) {
65 if (con->next)
66 con->next->prev = con->prev;
67 if (con->prev)
68 con->prev->next = con->next;
69 else
70 srv->conns = con->next;
71 con->prev = NULL;
72 con->next = srv->conns_pool;
73 srv->conns_pool = con;
74 ++srv->lim_conns;
75 }
76
connection_close(connection * con)77 static void connection_close(connection *con) {
78 if (con->fd < 0) con->fd = -con->fd;
79
80 plugins_call_handle_connection_close(con);
81
82 server * const srv = con->srv;
83 request_st * const r = &con->request;
84 request_reset_ex(r); /*(r->conf.* is still valid below)*/
85 connection_set_state(r, CON_STATE_CONNECT);
86
87 chunkqueue_reset(con->read_queue);
88 con->request_count = 0;
89 con->is_ssl_sock = 0;
90 con->revents_err = 0;
91
92 fdevent_fdnode_event_del(srv->ev, con->fdn);
93 fdevent_unregister(srv->ev, con->fdn);
94 con->fdn = NULL;
95 #ifdef __WIN32
96 if (0 == closesocket(con->fd))
97 #else
98 if (0 == close(con->fd))
99 #endif
100 --srv->cur_fds;
101 else
102 log_perror(r->conf.errh, __FILE__, __LINE__,
103 "(warning) close: %d", con->fd);
104
105 if (r->conf.log_state_handling) {
106 log_error(r->conf.errh, __FILE__, __LINE__,
107 "connection closed for fd %d", con->fd);
108 }
109 con->fd = -1;
110
111 connection_del(srv, con);
112 }
113
connection_read_for_eos_plain(connection * const con)114 static void connection_read_for_eos_plain(connection * const con) {
115 /* we have to do the linger_on_close stuff regardless
116 * of r->keep_alive; even non-keepalive sockets
117 * may still have unread data, and closing before reading
118 * it will make the client not see all our output.
119 */
120 ssize_t len;
121 const int type = sock_addr_get_family(&con->dst_addr);
122 char buf[16384];
123 do {
124 len = fdevent_socket_read_discard(con->fd, buf, sizeof(buf),
125 type, SOCK_STREAM);
126 } while (len > 0 || (len < 0 && errno == EINTR));
127
128 if (len < 0 && errno == EAGAIN) return;
129 #if defined(EWOULDBLOCK) && EWOULDBLOCK != EAGAIN
130 if (len < 0 && errno == EWOULDBLOCK) return;
131 #endif
132
133 /* 0 == len || (len < 0 && (errno is a non-recoverable error)) */
134 con->close_timeout_ts = log_monotonic_secs - (HTTP_LINGER_TIMEOUT+1);
135 }
136
connection_read_for_eos_ssl(connection * const con)137 static void connection_read_for_eos_ssl(connection * const con) {
138 if (con->network_read(con, con->read_queue, MAX_READ_LIMIT) < 0)
139 con->close_timeout_ts = log_monotonic_secs - (HTTP_LINGER_TIMEOUT+1);
140 chunkqueue_reset(con->read_queue);
141 }
142
connection_read_for_eos(connection * const con)143 static void connection_read_for_eos(connection * const con) {
144 !con->is_ssl_sock
145 ? connection_read_for_eos_plain(con)
146 : connection_read_for_eos_ssl(con);
147 }
148
connection_handle_close_state(connection * con)149 static void connection_handle_close_state(connection *con) {
150 connection_read_for_eos(con);
151
152 if (log_monotonic_secs - con->close_timeout_ts > HTTP_LINGER_TIMEOUT) {
153 connection_close(con);
154 }
155 }
156
connection_handle_shutdown(connection * con)157 static void connection_handle_shutdown(connection *con) {
158 plugins_call_handle_connection_shut_wr(con);
159
160 connection_reset(con);
161
162 /* close the connection */
163 if (con->fd >= 0
164 && (con->is_ssl_sock || 0 == shutdown(con->fd, SHUT_WR))) {
165 con->close_timeout_ts = log_monotonic_secs;
166
167 request_st * const r = &con->request;
168 connection_set_state(r, CON_STATE_CLOSE);
169 if (r->conf.log_state_handling) {
170 log_error(r->conf.errh, __FILE__, __LINE__,
171 "shutdown for fd %d", con->fd);
172 }
173 } else {
174 connection_close(con);
175 }
176 }
177
178
connection_handle_response_end_state(request_st * const r,connection * const con)179 static void connection_handle_response_end_state(request_st * const r, connection * const con) {
180 if (r->http_version > HTTP_VERSION_1_1) {
181 h2_retire_con(r, con);
182 r->keep_alive = 0;
183 /* set a status so that mod_accesslog, mod_rrdtool hooks are called
184 * in plugins_call_handle_request_done() (XXX: or set to 0 to omit) */
185 r->http_status = 100; /* XXX: what if con->state == CON_STATE_ERROR? */
186 }
187
188 /* call request_done hook if http_status set (e.g. to log request) */
189 /* (even if error, connection dropped, as long as http_status is set) */
190 if (r->http_status) plugins_call_handle_request_done(r);
191
192 if (r->reqbody_length != r->reqbody_queue.bytes_in
193 || r->state == CON_STATE_ERROR) {
194 /* request body may not have been read completely */
195 r->keep_alive = 0;
196 /* clean up failed partial write of 1xx intermediate responses*/
197 if (&r->write_queue != con->write_queue) { /*(for HTTP/1.1)*/
198 chunkqueue_free(con->write_queue);
199 con->write_queue = &r->write_queue;
200 }
201 }
202
203 if (r->keep_alive > 0) {
204 request_reset(r);
205 con->is_readable = 1; /* potentially trigger optimistic read */
206 /*(accounting used by mod_accesslog for HTTP/1.0 and HTTP/1.1)*/
207 /*(overloaded to detect next bytes recv'd on keep-alive con)*/
208 r->bytes_read_ckpt = r->read_queue.bytes_in;
209 r->bytes_written_ckpt = r->write_queue.bytes_out;
210 #if 0
211 r->start_hp.tv_sec = log_epoch_secs;
212 con->read_idle_ts = log_monotonic_secs;
213 #endif
214 connection_set_state(r, CON_STATE_REQUEST_START);
215 } else {
216 connection_handle_shutdown(con);
217 }
218 }
219
220
221 __attribute_pure__
222 static off_t
connection_write_throttled(const connection * const con,off_t max_bytes)223 connection_write_throttled (const connection * const con, off_t max_bytes)
224 {
225 const request_config * const restrict rconf = &con->request.conf;
226 if (0 == rconf->global_bytes_per_second && 0 == rconf->bytes_per_second)
227 return max_bytes;
228
229 if (rconf->global_bytes_per_second) {
230 off_t limit = (off_t)rconf->global_bytes_per_second
231 - *(rconf->global_bytes_per_second_cnt_ptr);
232 if (max_bytes > limit)
233 max_bytes = limit;
234 }
235
236 if (rconf->bytes_per_second) {
237 off_t limit = (off_t)rconf->bytes_per_second
238 - con->bytes_written_cur_second;
239 if (max_bytes > limit)
240 max_bytes = limit;
241 }
242
243 return max_bytes > 0 ? max_bytes : 0; /*(0 == reached traffic limit)*/
244 }
245
246
247 static off_t
connection_write_throttle(connection * const con,off_t max_bytes)248 connection_write_throttle (connection * const con, off_t max_bytes)
249 {
250 /*assert(max_bytes > 0);*/
251 max_bytes = connection_write_throttled(con, max_bytes);
252 if (0 == max_bytes) con->traffic_limit_reached = 1;
253 return max_bytes;
254 }
255
256
257 static int
connection_write_chunkqueue(connection * const con,chunkqueue * const restrict cq,off_t max_bytes)258 connection_write_chunkqueue (connection * const con, chunkqueue * const restrict cq, off_t max_bytes)
259 {
260 /*assert(!chunkqueue_is_empty(cq));*//* checked by callers */
261
262 con->write_request_ts = log_monotonic_secs;
263
264 max_bytes = connection_write_throttle(con, max_bytes);
265 if (0 == max_bytes) return 1;
266
267 off_t written = cq->bytes_out;
268 int ret;
269
270 #ifdef TCP_CORK
271 int corked = 0;
272 #endif
273
274 /* walk chunkqueue up to first FILE_CHUNK (if present)
275 * This may incur memory load misses for pointer chasing, but effectively
276 * preloads part of the chunkqueue, something which used to be a side effect
277 * of a previous (less efficient) version of chunkqueue_length() which
278 * walked the entire chunkqueue (on each and every call). The loads here
279 * make a measurable difference in performance in underlying call to
280 * con->network_write() */
281 if (cq->first->next && cq->first->type == MEM_CHUNK) {
282 const chunk *c = cq->first;
283 do { c = c->next; } while (c && c->type == MEM_CHUNK);
284 #ifdef TCP_CORK
285 /* Linux: put a cork into socket as we want to combine write() calls
286 * but only if we really have multiple chunks including non-MEM_CHUNK
287 * (or if multiple chunks and TLS), and only if TCP socket */
288 /* (max_bytes may have been reduced by connection_write_throttle(),
289 * but not bothering to check; might result in some extra corking) */
290 if (NULL != c || (con->is_ssl_sock && chunkqueue_length(cq) > 16384)) {
291 const int sa_family = sock_addr_get_family(&con->srv_socket->addr);
292 if (sa_family == AF_INET || sa_family == AF_INET6) {
293 corked = 1;
294 (void)setsockopt(con->fd, IPPROTO_TCP, TCP_CORK,
295 &corked, sizeof(corked));
296 }
297 }
298 #endif
299 }
300
301 ret = con->network_write(con, cq, max_bytes);
302 if (ret >= 0) {
303 ret = chunkqueue_is_empty(cq) ? 0 : 1;
304 }
305
306 #ifdef TCP_CORK
307 if (corked) {
308 corked = 0;
309 (void)setsockopt(con->fd, IPPROTO_TCP, TCP_CORK,
310 &corked, sizeof(corked));
311 }
312 #endif
313
314 written = cq->bytes_out - written;
315 con->bytes_written_cur_second += written;
316 request_st * const r = &con->request;
317 if (r->conf.global_bytes_per_second_cnt_ptr)
318 *(r->conf.global_bytes_per_second_cnt_ptr) += written;
319
320 return ret;
321 }
322
323
324 static int
connection_write_1xx_info(request_st * const r,connection * const con)325 connection_write_1xx_info (request_st * const r, connection * const con)
326 {
327 /* (Note: prior 1xx intermediate responses may be present in cq) */
328 /* (Note: also choosing not to update con->write_request_ts
329 * which differs from connection_write_chunkqueue()) */
330 chunkqueue * const cq = con->write_queue;
331 off_t written = cq->bytes_out;
332
333 int rc = con->network_write(con, cq, MAX_WRITE_LIMIT);
334
335 written = cq->bytes_out - written;
336 con->bytes_written_cur_second += written;
337 if (r->conf.global_bytes_per_second_cnt_ptr)
338 *(r->conf.global_bytes_per_second_cnt_ptr) += written;
339
340 if (rc < 0) {
341 connection_set_state_error(r, CON_STATE_ERROR);
342 return 0; /* error */
343 }
344
345 if (!chunkqueue_is_empty(cq)) { /* partial write (unlikely) */
346 con->is_writable = 0;
347 if (cq == &r->write_queue) {
348 /* save partial write of 1xx in separate chunkqueue
349 * Note: sending of remainder of 1xx might be delayed
350 * until next set of response headers are sent */
351 con->write_queue = chunkqueue_init(NULL);
352 /* (copy bytes for accounting purposes in event of failure) */
353 con->write_queue->bytes_in = cq->bytes_out; /*(yes, bytes_out)*/
354 con->write_queue->bytes_out = cq->bytes_out;
355 chunkqueue_append_chunkqueue(con->write_queue, cq);
356 }
357 }
358
359 #if 0
360 /* XXX: accounting inconsistency
361 * 1xx is not currently included in r->resp_header_len,
362 * so mod_accesslog reporting of %b or %B (FORMAT_BYTES_OUT_NO_HEADER)
363 * reports all bytes out minus len of final response headers,
364 * but including 1xx intermediate responses. If 1xx intermediate
365 * responses were included in r->resp_header_len, then there are a
366 * few places in the code which must be adjusted to use r->resp_header_done
367 * instead of (0 == r->resp_header_len) as flag that final response was set
368 * (Doing the following would "discard" the 1xx len from bytes_out)
369 */
370 r->write_queue.bytes_in = r->write_queue.bytes_out = 0;
371 #endif
372
373 return 1; /* success */
374 }
375
376
377 int
connection_send_1xx(request_st * const r,connection * const con)378 connection_send_1xx (request_st * const r, connection * const con)
379 {
380 /* Make best effort to send HTTP/1.1 1xx intermediate */
381 /* (Note: if other modules set response headers *before* the
382 * handle_response_start hook, and the backends subsequently sends 1xx,
383 * then the response headers are sent here with 1xx and might be cleared
384 * by caller (http_response_parse_headers() and http_response_check_1xx()),
385 * instead of being sent with the final response.
386 * (e.g. mod_magnet setting response headers, then backend sending 103)) */
387
388 chunkqueue * const cq = con->write_queue; /*(bypass r->write_queue)*/
389
390 buffer * const b = chunkqueue_append_buffer_open(cq);
391 buffer_copy_string_len(b, CONST_STR_LEN("HTTP/1.1 "));
392 http_status_append(b, r->http_status);
393 for (uint32_t i = 0; i < r->resp_headers.used; ++i) {
394 const data_string * const ds = (data_string *)r->resp_headers.data[i];
395 const uint32_t klen = buffer_clen(&ds->key);
396 const uint32_t vlen = buffer_clen(&ds->value);
397 if (0 == klen || 0 == vlen) continue;
398 buffer_append_str2(b, CONST_STR_LEN("\r\n"), ds->key.ptr, klen);
399 buffer_append_str2(b, CONST_STR_LEN(": "), ds->value.ptr, vlen);
400 }
401 buffer_append_string_len(b, CONST_STR_LEN("\r\n\r\n"));
402 chunkqueue_append_buffer_commit(cq);
403
404 if (con->traffic_limit_reached)
405 return 1; /* success; send later if throttled */
406
407 return connection_write_1xx_info(r, con);
408 }
409
410
411 static int
connection_write_100_continue(request_st * const r,connection * const con)412 connection_write_100_continue (request_st * const r, connection * const con)
413 {
414 /* Make best effort to send "HTTP/1.1 100 Continue" */
415 static const char http_100_continue[] = "HTTP/1.1 100 Continue\r\n\r\n";
416
417 if (con->traffic_limit_reached)
418 return 1; /* success; skip sending if throttled */
419
420 chunkqueue * const cq = con->write_queue; /*(bypass r->write_queue)*/
421 chunkqueue_append_mem(cq, http_100_continue, sizeof(http_100_continue)-1);
422 return connection_write_1xx_info(r, con);
423 }
424
425
connection_handle_write(request_st * const r,connection * const con)426 static int connection_handle_write(request_st * const r, connection * const con) {
427 /*assert(!chunkqueue_is_empty(cq));*//* checked by callers */
428
429 if (con->is_writable <= 0) return CON_STATE_WRITE;
430 int rc = connection_write_chunkqueue(con, con->write_queue, MAX_WRITE_LIMIT);
431 switch (rc) {
432 case 0:
433 if (r->resp_body_finished) {
434 connection_set_state(r, CON_STATE_RESPONSE_END);
435 return CON_STATE_RESPONSE_END;
436 }
437 break;
438 case -1: /* error on our side */
439 log_error(r->conf.errh, __FILE__, __LINE__,
440 "connection closed: write failed on fd %d", con->fd);
441 __attribute_fallthrough__
442 case -2: /* remote close */
443 connection_set_state_error(r, CON_STATE_ERROR);
444 return CON_STATE_ERROR;
445 case 1:
446 /* do not spin trying to send HTTP/2 server Connection Preface
447 * while waiting for TLS negotiation to complete */
448 if (con->write_queue->bytes_out)
449 con->is_writable = 0;
450
451 /* not finished yet -> WRITE */
452 break;
453 }
454
455 return CON_STATE_WRITE; /*(state did not change)*/
456 }
457
connection_handle_write_state(request_st * const r,connection * const con)458 static int connection_handle_write_state(request_st * const r, connection * const con) {
459 do {
460 /* only try to write if we have something in the queue */
461 if (!chunkqueue_is_empty(&r->write_queue)) {
462 if (r->http_version <= HTTP_VERSION_1_1) {
463 int rc = connection_handle_write(r, con);
464 if (rc != CON_STATE_WRITE) return rc;
465 }
466 } else if (r->resp_body_finished) {
467 connection_set_state(r, CON_STATE_RESPONSE_END);
468 return CON_STATE_RESPONSE_END;
469 }
470
471 if (r->handler_module && !r->resp_body_finished) {
472 const plugin * const p = r->handler_module;
473 int rc = p->handle_subrequest(r, p->data);
474 switch(rc) {
475 case HANDLER_WAIT_FOR_EVENT:
476 case HANDLER_FINISHED:
477 case HANDLER_GO_ON:
478 break;
479 case HANDLER_COMEBACK:
480 default:
481 log_error(r->conf.errh, __FILE__, __LINE__,
482 "unexpected subrequest handler ret-value: %d %d",
483 con->fd, rc);
484 __attribute_fallthrough__
485 case HANDLER_ERROR:
486 connection_set_state_error(r, CON_STATE_ERROR);
487 return CON_STATE_ERROR;
488 }
489 }
490 } while (r->http_version <= HTTP_VERSION_1_1
491 && (!chunkqueue_is_empty(&r->write_queue)
492 ? con->is_writable > 0 && 0 == con->traffic_limit_reached
493 : r->resp_body_finished));
494
495 return CON_STATE_WRITE;
496 }
497
498
499 __attribute_cold__
connection_init(server * srv)500 static connection *connection_init(server *srv) {
501 connection * const con = ck_calloc(1, sizeof(*con));
502
503 con->srv = srv;
504 con->plugin_slots = srv->plugin_slots;
505 con->config_data_base = srv->config_data_base;
506
507 request_st * const r = &con->request;
508 request_init_data(r, con, srv);
509 con->write_queue = &r->write_queue;
510 con->read_queue = &r->read_queue;
511
512 /* init plugin-specific per-connection structures */
513 con->plugin_ctx = ck_calloc(srv->plugins.used + 1, sizeof(void *));
514
515 return con;
516 }
517
518
connection_free(connection * const con)519 static void connection_free(connection * const con) {
520 request_st * const r = &con->request;
521
522 connection_reset(con);
523 if (con->write_queue != &r->write_queue)
524 chunkqueue_free(con->write_queue);
525 if (con->read_queue != &r->read_queue)
526 chunkqueue_free(con->read_queue);
527 request_free_data(r);
528
529 free(con->plugin_ctx);
530 free(con->dst_addr_buf.ptr);
531 free(con);
532 }
533
connections_pool_clear(server * const srv)534 void connections_pool_clear(server * const srv) {
535 connection *con;
536 while ((con = srv->conns_pool)) {
537 srv->conns_pool = con->next;
538 connection_free(con);
539 }
540 }
541
connections_free(server * srv)542 void connections_free(server *srv) {
543 connections_pool_clear(srv);
544
545 connection *con;
546 while ((con = srv->conns)) {
547 srv->conns = con->next;
548 connection_free(con);
549 }
550 }
551
552
connection_reset(connection * con)553 static void connection_reset(connection *con) {
554 request_st * const r = &con->request;
555 request_reset(r);
556 r->bytes_read_ckpt = 0;
557 r->bytes_written_ckpt = 0;
558 con->is_readable = 1;
559 con->bytes_written_cur_second = 0;
560 }
561
562
563 __attribute_cold__
564 static chunk *
connection_discard_blank_line(chunkqueue * const cq,uint32_t header_len)565 connection_discard_blank_line (chunkqueue * const cq, uint32_t header_len)
566 {
567 /*(separate func only to be able to mark with compiler hint as cold)*/
568 chunkqueue_mark_written(cq, header_len);
569 return cq->first; /* refresh c after chunkqueue_mark_written() */
570 }
571
572
connection_read_header_more(connection * con,chunkqueue * cq,chunk * c,const size_t olen)573 static chunk * connection_read_header_more(connection *con, chunkqueue *cq, chunk *c, const size_t olen) {
574 /*(should not be reached by HTTP/2 streams)*/
575 /*if (r->http_version == HTTP_VERSION_2) return NULL;*/
576 /*(However, new connections over TLS may become HTTP/2 connections via ALPN
577 * and return from this routine with r->http_version == HTTP_VERSION_2) */
578
579 if ((NULL == c || NULL == c->next) && con->is_readable > 0) {
580 con->read_idle_ts = log_monotonic_secs;
581 if (0 != con->network_read(con, cq, MAX_READ_LIMIT)) {
582 request_st * const r = &con->request;
583 connection_set_state_error(r, CON_STATE_ERROR);
584 }
585 /* check if switched to HTTP/2 (ALPN "h2" during TLS negotiation) */
586 request_st * const r = &con->request;
587 if (r->http_version == HTTP_VERSION_2) return NULL;
588 }
589
590 if (cq->first != cq->last && 0 != olen) {
591 const size_t clen = chunkqueue_length(cq);
592 size_t block = (olen + (16384-1)) & ~(16384-1);
593 block += (block - olen > 1024 ? 0 : 16384);
594 chunkqueue_compact_mem(cq, block > clen ? clen : block);
595 }
596
597 /* detect if data is added to chunk */
598 c = cq->first;
599 return (c && (size_t)c->offset + olen < buffer_clen(c->mem))
600 ? c
601 : NULL;
602 }
603
604
605 __attribute_cold__
606 static void
connection_transition_h2(request_st * const h2r,connection * const con)607 connection_transition_h2 (request_st * const h2r, connection * const con)
608 {
609 buffer_copy_string_len(&h2r->target, CONST_STR_LEN("*"));
610 buffer_copy_string_len(&h2r->target_orig, CONST_STR_LEN("*"));
611 buffer_copy_string_len(&h2r->uri.path, CONST_STR_LEN("*"));
612 h2r->http_method = HTTP_METHOD_PRI;
613 h2r->reqbody_length = -1; /*(unnecessary for h2r?)*/
614 h2r->conf.stream_request_body |= FDEVENT_STREAM_REQUEST_POLLIN;
615
616 /* (h2r->state == CON_STATE_READ) for transition by ALPN
617 * or starting cleartext HTTP/2 with Prior Knowledge
618 * (e.g. via HTTP Alternative Services)
619 * (h2r->state == CON_STATE_REQUEST_END) for Upgrade: h2c */
620
621 if (h2r->state != CON_STATE_ERROR)
622 connection_set_state(h2r, CON_STATE_WRITE);
623
624 #if 0 /* ... if it turns out we need a separate fdevent handler for HTTP/2 */
625 con->fdn->handler = connection_handle_fdevent_h2;
626 #endif
627
628 if (NULL == con->h2) /*(not yet transitioned to HTTP/2; not Upgrade: h2c)*/
629 h2_init_con(h2r, con, NULL);
630 }
631
632
633 /**
634 * handle request header read
635 *
636 * we get called by the state-engine and by the fdevent-handler
637 */
638 __attribute_noinline__
connection_handle_read_state(connection * const con)639 static int connection_handle_read_state(connection * const con) {
640 /*(should not be reached by HTTP/2 streams)*/
641 chunkqueue * const cq = con->read_queue;
642 chunk *c = cq->first;
643 uint32_t clen = 0;
644 uint32_t header_len = 0;
645 request_st * const r = &con->request;
646 uint8_t keepalive_request_start = 0;
647 uint8_t pipelined_request_start = 0;
648 uint8_t discard_blank = 0;
649 unsigned short hoff[8192]; /* max num header lines + 3; 16k on stack */
650
651 if (con->request_count > 1) {
652 discard_blank = 1;
653 if (cq->bytes_in == r->bytes_read_ckpt) {
654 keepalive_request_start = 1;
655 if (NULL != c) { /* !chunkqueue_is_empty(cq)) */
656 pipelined_request_start = 1;
657 /* partial header of next request has already been read,
658 * so optimistically check for more data received on
659 * socket while processing the previous request */
660 con->is_readable = 1;
661 /*(if partially read next request and unable to read any bytes,
662 * then will unnecessarily scan again before subsequent read)*/
663 }
664 }
665 }
666
667 do {
668 if (NULL == c) continue;
669 clen = buffer_clen(c->mem) - c->offset;
670 if (0 == clen) continue;
671 if (__builtin_expect( (c->offset > USHRT_MAX), 0)) /*(highly unlikely)*/
672 chunkqueue_compact_mem_offset(cq);
673
674 hoff[0] = 1; /* number of lines */
675 hoff[1] = (unsigned short)c->offset; /* base offset for all lines */
676 /*hoff[2] = ...;*/ /* offset from base for 2nd line */
677
678 header_len = http_header_parse_hoff(c->mem->ptr + c->offset,clen,hoff);
679
680 /* casting to (unsigned short) might truncate, and the hoff[]
681 * addition might overflow, but max_request_field_size is USHRT_MAX,
682 * so failure will be detected below */
683 const uint32_t max_request_field_size = r->conf.max_request_field_size;
684 if ((header_len ? header_len : clen) > max_request_field_size
685 || hoff[0] >= sizeof(hoff)/sizeof(hoff[0])-1) {
686 log_error(r->conf.errh, __FILE__, __LINE__, "%s",
687 "oversized request-header -> sending Status 431");
688 r->http_status = 431; /* Request Header Fields Too Large */
689 r->keep_alive = 0;
690 return 1;
691 }
692
693 if (__builtin_expect( (0 != header_len), 1)) {
694 if (__builtin_expect( (hoff[0] > 1), 1))
695 break; /* common case; request headers complete */
696
697 if (discard_blank) { /* skip one blank line e.g. following POST */
698 if (header_len == clen) continue;
699 const int ch = c->mem->ptr[c->offset+header_len];
700 if (ch != '\r' && ch != '\n') {
701 /* discard prior blank line if next line is not blank */
702 discard_blank = 0;
703 clen = 0;/*(for connection_read_header_more() to return c)*/
704 c = connection_discard_blank_line(cq, header_len);/*cold*/
705 continue;
706 } /*(else fall through to error out in next block)*/
707 }
708 }
709
710 if (((unsigned char *)c->mem->ptr)[c->offset] < 32) {
711 /* expecting ASCII method beginning with alpha char
712 * or HTTP/2 pseudo-header beginning with ':' */
713 /*(TLS handshake begins with SYN 0x16 (decimal 22))*/
714 log_error(r->conf.errh, __FILE__, __LINE__, "%s (%s)",
715 c->mem->ptr[c->offset] == 0x16
716 ? "unexpected TLS ClientHello on clear port"
717 : "invalid request-line -> sending Status 400",
718 con->dst_addr_buf.ptr);
719 r->http_status = 400; /* Bad Request */
720 r->keep_alive = 0;
721 return 1;
722 }
723 } while ((c = connection_read_header_more(con, cq, c, clen)));
724
725 if (keepalive_request_start) {
726 if (cq->bytes_in > r->bytes_read_ckpt) {
727 /* update r->start_hp.tv_sec timestamp when first byte of
728 * next request is received on a keep-alive connection */
729 r->start_hp.tv_sec = log_epoch_secs;
730 if (r->conf.high_precision_timestamps)
731 log_clock_gettime_realtime(&r->start_hp);
732 }
733 if (pipelined_request_start && c)
734 con->read_idle_ts = log_monotonic_secs;
735 }
736
737 if (NULL == c) return 0; /* incomplete request headers */
738
739 #ifdef __COVERITY__
740 if (buffer_clen(c->mem) < hoff[1]) {
741 return 1;
742 }
743 #endif
744
745 char * const hdrs = c->mem->ptr + hoff[1];
746
747 if (con->request_count > 1) {
748 /* adjust r->bytes_read_ckpt for http_request_stats_bytes_in()
749 * (headers_len is still in cq; marked written, bytes_out incr below) */
750 r->bytes_read_ckpt = cq->bytes_out;
751 /* clear buffers which may have been kept for reporting on keep-alive,
752 * (e.g. mod_status) */
753 request_reset_ex(r);
754 }
755 /* RFC7540 3.5 HTTP/2 Connection Preface
756 * "PRI * HTTP/2.0\r\n\r\nSM\r\n\r\n"
757 * (Connection Preface MUST be exact match)
758 * If ALT-SVC used to advertise HTTP/2, then client might start
759 * http connection (not TLS) sending HTTP/2 connection preface.
760 * (note: intentionally checking only on initial request) */
761 else if (!con->is_ssl_sock && r->conf.h2proto
762 && hoff[0] == 2 && hoff[2] == 16
763 && hdrs[0]=='P' && hdrs[1]=='R' && hdrs[2]=='I' && hdrs[3]==' ') {
764 r->http_version = HTTP_VERSION_2;
765 return 0;
766 }
767
768 r->rqst_header_len = header_len;
769 if (r->conf.log_request_header)
770 log_error_multiline(r->conf.errh, __FILE__, __LINE__,
771 hdrs, header_len, "fd:%d rqst: ", con->fd);
772 http_request_headers_process(r, hdrs, hoff, con->proto_default_port);
773 chunkqueue_mark_written(cq, r->rqst_header_len);
774
775 if (light_btst(r->rqst_htags, HTTP_HEADER_UPGRADE)
776 && 0 == r->http_status
777 && h2_check_con_upgrade_h2c(r)) {
778 /*(Upgrade: h2c over cleartext does not have SNI; no COMP_HTTP_HOST)*/
779 r->conditional_is_valid = (1 << COMP_SERVER_SOCKET)
780 | (1 << COMP_HTTP_REMOTE_IP);
781 r->bytes_read_ckpt = 0;
782 /*connection_handle_write(r, con);*//* defer write to network */
783 return 0;
784 }
785
786 return 1;
787 }
788
789
connection_handle_fdevent(void * const context,const int revents)790 static handler_t connection_handle_fdevent(void * const context, const int revents) {
791 connection * restrict con = context;
792 const int is_ssl_sock = con->is_ssl_sock;
793
794 joblist_append(con);
795
796 if (revents & ~(FDEVENT_IN | FDEVENT_OUT))
797 con->revents_err |= (revents & ~(FDEVENT_IN | FDEVENT_OUT));
798
799 if (revents & (FDEVENT_IN | FDEVENT_OUT)) {
800 if (is_ssl_sock) /*(ssl may read and write for both reads and writes)*/
801 con->is_readable = con->is_writable = 1;
802 else {
803 if (revents & FDEVENT_IN)
804 con->is_readable = 1;
805 if (revents & FDEVENT_OUT)
806 con->is_writable = 1;
807 }
808 }
809
810 return HANDLER_FINISHED;
811 }
812
813
814 __attribute_cold__
connection_read_cq_err(connection * con)815 static int connection_read_cq_err(connection *con) {
816 request_st * const r = &con->request;
817 #if defined(__WIN32)
818 int lastError = WSAGetLastError();
819 switch (lastError) {
820 case EAGAIN:
821 return 0;
822 case EINTR:
823 /* we have been interrupted before we could read */
824 con->is_readable = 1;
825 return 0;
826 case ECONNRESET:
827 /* suppress logging for this error, expected for keep-alive */
828 break;
829 default:
830 log_error(r->conf.errh, __FILE__, __LINE__,
831 "connection closed - recv failed: %d", lastError);
832 break;
833 }
834 #else /* __WIN32 */
835 switch (errno) {
836 case EAGAIN:
837 return 0;
838 case EINTR:
839 /* we have been interrupted before we could read */
840 con->is_readable = 1;
841 return 0;
842 case ECONNRESET:
843 /* suppress logging for this error, expected for keep-alive */
844 break;
845 default:
846 log_perror(r->conf.errh, __FILE__, __LINE__,
847 "connection closed - read failed");
848 break;
849 }
850 #endif /* __WIN32 */
851
852 connection_set_state_error(r, CON_STATE_ERROR);
853 return -1;
854 }
855
856
857 /* 0: everything ok, -1: error, -2: con closed */
connection_read_cq(connection * con,chunkqueue * cq,off_t max_bytes)858 static int connection_read_cq(connection *con, chunkqueue *cq, off_t max_bytes) {
859 ssize_t len;
860 size_t mem_len = 0;
861
862 do {
863 /* obtain chunk memory into which to read
864 * fill previous chunk if it has a reasonable amount of space available
865 * (use mem_len=0 to obtain large buffer at least half of chunk_buf_sz)
866 */
867 chunk *ckpt = cq->last;
868 char * const mem = chunkqueue_get_memory(cq, &mem_len);
869 if (mem_len > (size_t)max_bytes) mem_len = (size_t)max_bytes;
870
871 #if defined(__WIN32)
872 len = recv(con->fd, mem, mem_len, 0);
873 #else
874 len = read(con->fd, mem, mem_len);
875 #endif
876
877 chunkqueue_use_memory(cq, ckpt, len > 0 ? len : 0);
878
879 if (len != (ssize_t)mem_len) {
880 /* we got less than expected, wait for the next fd-event */
881 con->is_readable = 0;
882 return len > 0 ? 0 : 0 == len ? -2 : connection_read_cq_err(con);
883 }
884
885 max_bytes -= len;
886
887 int frd;
888 mem_len = (0 == fdevent_ioctl_fionread(con->fd, S_IFSOCK, &frd))
889 ? (frd < max_bytes) ? (size_t)frd : (size_t)max_bytes
890 : 0;
891 } while (max_bytes);
892 return 0;
893 }
894
895
connection_write_cq(connection * con,chunkqueue * cq,off_t max_bytes)896 static int connection_write_cq(connection *con, chunkqueue *cq, off_t max_bytes) {
897 request_st * const r = &con->request;
898 return con->srv->network_backend_write(con->fd,cq,max_bytes,r->conf.errh);
899 }
900
901
902 static handler_t connection_handle_read_post_state(request_st * const r);
903
connection_accepted(server * srv,const server_socket * srv_socket,sock_addr * cnt_addr,int cnt)904 connection *connection_accepted(server *srv, const server_socket *srv_socket, sock_addr *cnt_addr, int cnt) {
905 connection *con;
906
907 srv->cur_fds++;
908
909 /* ok, we have the connection, register it */
910 #if 0
911 log_error(srv->errh, __FILE__, __LINE__, "accepted() %d", cnt);
912 #endif
913
914 con = connections_get_new_connection(srv);
915
916 con->fd = cnt;
917 con->fdn = fdevent_register(srv->ev, con->fd, connection_handle_fdevent, con);
918 con->network_read = connection_read_cq;
919 con->network_write = connection_write_cq;
920 con->reqbody_read = connection_handle_read_post_state;
921
922 request_st * const r = &con->request;
923 connection_set_state(r, CON_STATE_REQUEST_START);
924
925 con->connection_start = log_monotonic_secs;
926 con->dst_addr = *cnt_addr;
927 sock_addr_cache_inet_ntop_copy_buffer(&con->dst_addr_buf,
928 &con->dst_addr);
929 con->srv_socket = srv_socket;
930 con->is_ssl_sock = srv_socket->is_ssl;
931 con->proto_default_port = 80; /* "http" */
932
933 config_cond_cache_reset(r);
934 r->conditional_is_valid = (1 << COMP_SERVER_SOCKET)
935 | (1 << COMP_HTTP_REMOTE_IP);
936
937 if (HANDLER_GO_ON != plugins_call_handle_connection_accept(con)) {
938 connection_reset(con);
939 connection_close(con);
940 return NULL;
941 }
942 if (r->http_status < 0) connection_set_state(r, CON_STATE_WRITE);
943 return con;
944 }
945
946
947 __attribute_cold__
948 __attribute_noinline__
__attribute_nonnull__()949 __attribute_nonnull__()
950 static void
951 connection_log_state (const request_st * const r, const char * const tag)
952 {
953 buffer * const tb = r->tmp_buf;
954 buffer_clear(tb);
955 http_request_state_append(tb, r->state);
956 log_error(r->conf.errh, __FILE__, __LINE__,
957 "fd:%d id:%d state:%s%s", r->con->fd, r->h2id, tb->ptr, tag);
958 }
959
960
961 static void connection_state_machine_h2 (connection * const con);
962
963
964 static void
connection_state_machine_loop(request_st * const r,connection * const con)965 connection_state_machine_loop (request_st * const r, connection * const con)
966 {
967 request_state_t ostate;
968 do {
969 if (r->conf.log_state_handling)
970 connection_log_state(r, "");
971
972 switch ((ostate = r->state)) {
973 case CON_STATE_REQUEST_START: /* transient */
974 /*(should not be reached by HTTP/2 streams)*/
975 r->start_hp.tv_sec = log_epoch_secs;
976 con->read_idle_ts = log_monotonic_secs;
977 if (r->conf.high_precision_timestamps)
978 log_clock_gettime_realtime(&r->start_hp);
979
980 con->request_count++;
981 r->loops_per_request = 0;
982
983 connection_set_state(r, CON_STATE_READ);
984 __attribute_fallthrough__
985 case CON_STATE_READ:
986 /*(should not be reached by HTTP/2 streams)*/
987 if (!connection_handle_read_state(con)) {
988 if (r->http_version == HTTP_VERSION_2) {
989 connection_transition_h2(r, con);
990 connection_state_machine_h2(con);
991 return;
992 }
993 break;
994 }
995 /*connection_set_state(r, CON_STATE_REQUEST_END);*/
996 __attribute_fallthrough__
997 case CON_STATE_REQUEST_END: /* transient */
998 connection_set_state(r,
999 (0 == r->reqbody_length)
1000 ? CON_STATE_HANDLE_REQUEST
1001 : CON_STATE_READ_POST);
1002 __attribute_fallthrough__
1003 case CON_STATE_READ_POST:
1004 case CON_STATE_HANDLE_REQUEST:
1005 switch (http_response_handler(r)) {
1006 case HANDLER_GO_ON:/*CON_STATE_RESPONSE_START occurred;transient*/
1007 case HANDLER_FINISHED:
1008 break;
1009 case HANDLER_WAIT_FOR_EVENT:
1010 return;
1011 /*case HANDLER_COMEBACK:*//*(not expected)*/
1012 /*case HANDLER_ERROR:*/
1013 default:
1014 connection_set_state_error(r, CON_STATE_ERROR);
1015 continue;
1016 }
1017 /*__attribute_fallthrough__*/
1018 /*case CON_STATE_RESPONSE_START:*//*occurred;transient*/
1019 if (r->http_version > HTTP_VERSION_1_1)
1020 h2_send_headers(r, con);
1021 else
1022 http_response_write_header(r);
1023 connection_set_state(r, CON_STATE_WRITE);
1024 __attribute_fallthrough__
1025 case CON_STATE_WRITE:
1026 if (connection_handle_write_state(r, con) == CON_STATE_WRITE)
1027 return;
1028 __attribute_fallthrough__
1029 case CON_STATE_RESPONSE_END: /* transient */
1030 case CON_STATE_ERROR: /* transient */
1031 if (r->http_version > HTTP_VERSION_1_1 && r != &con->request)
1032 return;
1033 connection_handle_response_end_state(r, con);
1034 /*(make sure ostate will not match r->state)*/
1035 ostate = CON_STATE_RESPONSE_END;/* != r->state */
1036 break;
1037 case CON_STATE_CLOSE:
1038 /*(should not be reached by HTTP/2 streams)*/
1039 connection_handle_close_state(con);
1040 break;
1041 case CON_STATE_CONNECT:
1042 break;
1043 default:/*(should not happen)*/
1044 /*connection_log_state(r, "");*/ /*(unknown state)*/
1045 break;
1046 }
1047 } while (ostate != (request_state_t)r->state);
1048 }
1049
1050
1051 __attribute_cold__
1052 static void
connection_revents_err(request_st * const r,connection * const con)1053 connection_revents_err (request_st * const r, connection * const con)
1054 {
1055 /* defer handling FDEVENT_HUP and FDEVENT_ERR to here in order to
1056 * first attempt (in callers) to read data in kernel socket buffers */
1057 /*assert(con->revents_err & ~(FDEVENT_IN | FDEVENT_OUT));*/
1058 const int revents = (int)con->revents_err;
1059 con->revents_err = 0;
1060
1061 if (r->state == CON_STATE_CLOSE)
1062 con->close_timeout_ts = log_monotonic_secs - (HTTP_LINGER_TIMEOUT+1);
1063 else if (revents & FDEVENT_HUP)
1064 connection_set_state_error(r, CON_STATE_ERROR);
1065 else if (revents & FDEVENT_RDHUP) {
1066 int events = fdevent_fdnode_interest(con->fdn);
1067 events &= ~(FDEVENT_IN|FDEVENT_RDHUP);
1068 r->conf.stream_request_body &=
1069 ~(FDEVENT_STREAM_REQUEST_BUFMIN|FDEVENT_STREAM_REQUEST_POLLIN);
1070 r->conf.stream_request_body |= FDEVENT_STREAM_REQUEST_POLLRDHUP;
1071 con->is_readable = 1; /*(can read 0 for end-of-stream)*/
1072 if (chunkqueue_is_empty(con->read_queue)) r->keep_alive = 0;
1073 if (r->reqbody_length < -1)/*(transparent proxy mode; no more rd data)*/
1074 r->reqbody_length = r->reqbody_queue.bytes_in;
1075 if (sock_addr_get_family(&con->dst_addr) == AF_UNIX) {
1076 /* future: will getpeername() on AF_UNIX check if still connected?*/
1077 fdevent_fdnode_event_set(con->srv->ev, con->fdn, events);
1078 }
1079 else if (fdevent_is_tcp_half_closed(con->fd)) {
1080 /* Success of fdevent_is_tcp_half_closed() after FDEVENT_RDHUP
1081 * indicates TCP FIN received, but does not distinguish between
1082 * client shutdown(fd, SHUT_WR) and client close(fd). Remove
1083 * FDEVENT_RDHUP so that we do not spin on ready event. However,
1084 * a later TCP RST will not be detected until next write to socket.
1085 * future: might getpeername() to check for TCP RST on half-closed
1086 * sockets (without FDEVENT_RDHUP interest) when checking for write
1087 * timeouts once a second in server.c, though getpeername() on
1088 * Windows might not indicate this */
1089 r->conf.stream_request_body |= FDEVENT_STREAM_REQUEST_TCP_FIN;
1090 fdevent_fdnode_event_set(con->srv->ev, con->fdn, events);
1091 }
1092 else {
1093 /* Failure of fdevent_is_tcp_half_closed() indicates TCP RST
1094 * (or unable to tell (unsupported OS), though should not
1095 * be setting FDEVENT_RDHUP in that case) */
1096 connection_set_state_error(r, CON_STATE_ERROR);
1097 }
1098 }
1099 else if (revents & FDEVENT_ERR) /* error, connection reset */
1100 connection_set_state_error(r, CON_STATE_ERROR);
1101 else
1102 log_error(r->conf.errh, __FILE__, __LINE__,
1103 "connection closed: poll() -> ??? %d", revents);
1104 }
1105
1106
1107 static void
connection_set_fdevent_interest(request_st * const r,connection * const con)1108 connection_set_fdevent_interest (request_st * const r, connection * const con)
1109 {
1110 if (con->fd < 0) return;
1111
1112 if (con->revents_err && r->state != CON_STATE_ERROR) {
1113 connection_revents_err(r, con); /* resets con->revents_err = 0 */
1114 connection_state_machine(con);
1115 return;
1116 /* connection_state_machine() will end up calling back into
1117 * connection_set_fdevent_interest(), but with 0 == con->revents_err */
1118 }
1119
1120 int n = 0;
1121 switch(r->state) {
1122 case CON_STATE_READ:
1123 n = FDEVENT_IN;
1124 if (!(r->conf.stream_request_body & FDEVENT_STREAM_REQUEST_POLLRDHUP))
1125 n |= FDEVENT_RDHUP;
1126 break;
1127 case CON_STATE_WRITE:
1128 if (!chunkqueue_is_empty(con->write_queue)
1129 && 0 == con->is_writable && 0 == con->traffic_limit_reached)
1130 n |= FDEVENT_OUT;
1131 __attribute_fallthrough__
1132 case CON_STATE_READ_POST:
1133 if (r->conf.stream_request_body & FDEVENT_STREAM_REQUEST_POLLIN)
1134 n |= FDEVENT_IN;
1135 if (!(r->conf.stream_request_body & FDEVENT_STREAM_REQUEST_POLLRDHUP))
1136 n |= FDEVENT_RDHUP;
1137 break;
1138 case CON_STATE_CLOSE:
1139 n = FDEVENT_IN;
1140 break;
1141 case CON_STATE_CONNECT:
1142 return;
1143 default:
1144 break;
1145 }
1146
1147 const int events = fdevent_fdnode_interest(con->fdn);
1148 if (con->is_readable < 0) {
1149 con->is_readable = 0;
1150 n |= FDEVENT_IN;
1151 }
1152 if (con->is_writable < 0) {
1153 con->is_writable = 0;
1154 n |= FDEVENT_OUT;
1155 }
1156 if (events & FDEVENT_RDHUP)
1157 n |= FDEVENT_RDHUP;
1158
1159 if (n == events) return;
1160
1161 /* update timestamps when enabling interest in events */
1162 if ((n & FDEVENT_IN) && !(events & FDEVENT_IN))
1163 con->read_idle_ts = log_monotonic_secs;
1164 if ((n & FDEVENT_OUT) && !(events & FDEVENT_OUT))
1165 con->write_request_ts = log_monotonic_secs;
1166 fdevent_fdnode_event_set(con->srv->ev, con->fdn, n);
1167 }
1168
1169
1170 __attribute_cold__
1171 static void
connection_request_end_h2(request_st * const h2r,connection * const con)1172 connection_request_end_h2 (request_st * const h2r, connection * const con)
1173 {
1174 if (h2r->keep_alive >= 0) {
1175 h2r->keep_alive = -1;
1176 h2_send_goaway(con, H2_E_NO_ERROR);
1177 http_response_delay(con);
1178 }
1179 else /*(abort connection upon second request to close h2 connection)*/
1180 h2_send_goaway(con, H2_E_ENHANCE_YOUR_CALM);
1181 }
1182
1183
1184 static void
connection_state_machine_h2(connection * const con)1185 connection_state_machine_h2 (connection * const con)
1186 {
1187 h2con * const h2c = con->h2;
1188 request_st * const h2r = &con->request;
1189
1190 if (h2c->sent_goaway <= 0
1191 && (chunkqueue_is_empty(con->read_queue) || h2_parse_frames(con))
1192 && con->is_readable > 0) {
1193 chunkqueue * const cq = con->read_queue;
1194 const off_t mark = cq->bytes_in;
1195 if (0 == con->network_read(con, cq, MAX_READ_LIMIT)) {
1196 if (mark < cq->bytes_in)
1197 h2_parse_frames(con);
1198 }
1199 else {
1200 /* network error; do not send GOAWAY, but pretend that we did */
1201 h2c->sent_goaway = H2_E_CONNECT_ERROR; /*any error (not NO_ERROR)*/
1202 connection_set_state_error(h2r, CON_STATE_ERROR);
1203 }
1204 }
1205
1206 /* process requests on HTTP/2 streams */
1207 int resched = 0;
1208 if (h2c->sent_goaway <= 0 && h2c->rused) {
1209 /* coarse check for write throttling
1210 * (connection.kbytes-per-second, server.kbytes-per-second)
1211 * obtain an approximate limit, not refreshed per request_st,
1212 * even though we are not calculating response HEADERS frames
1213 * or frame overhead here */
1214 off_t max_bytes = con->is_writable > 0
1215 ? connection_write_throttle(con, MAX_WRITE_LIMIT)
1216 : 0;
1217 const off_t cqlen = chunkqueue_length(con->write_queue);
1218 if (cqlen > 8192 && max_bytes > 65536) max_bytes = 65536;
1219 max_bytes -= cqlen;
1220 if (max_bytes < 0) max_bytes = 0;
1221
1222 /* XXX: to avoid buffer bloat due to staging too much data in
1223 * con->write_queue, consider setting limit on how much is staged
1224 * for sending on con->write_queue: adjusting max_bytes down */
1225
1226 for (uint32_t i = 0; i < h2c->rused; ++i) {
1227 request_st * const r = h2c->r[i];
1228 /* future: might track read/write interest per request
1229 * to avoid iterating through all active requests */
1230
1231 connection_state_machine_loop(r, con);
1232
1233 #if 0
1234 if (r->conf.log_state_handling)
1235 connection_log_state(r, " at loop exit");
1236 #endif
1237
1238 if (r->state < CON_STATE_WRITE)
1239 continue;
1240 /* else CON_STATE_WRITE, CON_STATE_RESPONSE_END, CON_STATE_ERROR */
1241 else if (r->state == CON_STATE_WRITE) {
1242 if (__builtin_expect((!chunkqueue_is_empty(&r->write_queue)), 1)
1243 && max_bytes
1244 && (r->resp_body_finished
1245 || (r->conf.stream_response_body
1246 & (FDEVENT_STREAM_RESPONSE
1247 |FDEVENT_STREAM_RESPONSE_BUFMIN)))) {
1248 /*(subtract 9 byte HTTP/2 frame overhead from each 16k DATA
1249 * frame for more efficient sending of large files)*/
1250 /*(use smaller max per stream if marked 'incremental' (w/ 0)
1251 * to give more streams a chance to send in parallel)*/
1252 uint32_t dlen = (r->h2_prio & 1) ? 32768-18 : 8192;
1253 if (dlen > (uint32_t)max_bytes) dlen = (uint32_t)max_bytes;
1254 dlen = h2_send_cqdata(r, con, &r->write_queue, dlen);
1255 if (dlen) { /*(do not resched (spin) if swin empty window)*/
1256 max_bytes -= (off_t)dlen;
1257 if (!chunkqueue_is_empty(&r->write_queue))
1258 resched |= 1;
1259 }
1260 }
1261 if (!chunkqueue_is_empty(&r->write_queue)
1262 || !r->resp_body_finished)
1263 continue;
1264
1265 connection_set_state(r, CON_STATE_RESPONSE_END);
1266 if (__builtin_expect( (r->conf.log_state_handling), 0))
1267 connection_log_state(r, "");
1268 }
1269
1270 {/*(r->state==CON_STATE_RESPONSE_END || r->state==CON_STATE_ERROR)*/
1271 /*(trigger reschedule of con if frames pending)*/
1272 if (h2c->rused == sizeof(h2c->r)/sizeof(*h2c->r)
1273 && !chunkqueue_is_empty(con->read_queue))
1274 resched |= 2;
1275 h2_send_end_stream(r, con);
1276 const int alive = r->keep_alive;
1277 h2_retire_stream(r, con);/*r invalidated;removed from h2c->r[]*/
1278 --i;/* adjust loop i; h2c->rused was modified to retire r */
1279 /*(special-case: allow *stream* to set r->keep_alive = -1 to
1280 * trigger goaway on h2 connection, e.g. after mod_auth failure
1281 * in attempt to mitigate brute force attacks by forcing a
1282 * reconnect and (somewhat) slowing down retries)*/
1283 if (alive < 0)
1284 connection_request_end_h2(h2r, con);
1285 }
1286 }
1287
1288 if (0 == max_bytes) resched |= 1;
1289 }
1290
1291 if (h2c->sent_goaway > 0 && h2c->rused) {
1292 /* retire streams if an error has occurred
1293 * note: this is not done to other streams in the loop above
1294 * (besides the current stream in the loop) due to the specific
1295 * implementation above, where doing so would mess up the iterator */
1296 for (uint32_t i = 0; i < h2c->rused; ++i) {
1297 request_st * const r = h2c->r[i];
1298 /*assert(r->h2state == H2_STATE_CLOSED);*/
1299 h2_retire_stream(r, con);/*r invalidated;removed from h2c->r[]*/
1300 --i;/* adjust loop i; h2c->rused was modified to retire r */
1301 }
1302 /* XXX: ? should we discard con->write_queue
1303 * and change h2r->state to CON_STATE_RESPONSE_END ? */
1304 }
1305
1306 if (h2r->state == CON_STATE_WRITE) {
1307 /* write HTTP/2 frames to socket */
1308 if (!chunkqueue_is_empty(con->write_queue))
1309 connection_handle_write(h2r, con);
1310
1311 if (chunkqueue_is_empty(con->write_queue)
1312 && 0 == h2c->rused && h2c->sent_goaway)
1313 connection_set_state(h2r, CON_STATE_RESPONSE_END);
1314 }
1315
1316 if (h2r->state == CON_STATE_WRITE) {
1317 /* (resched & 1) more data is available to write, if still able to write
1318 * (resched & 2) resched to read deferred frames from con->read_queue */
1319 /*(con->is_writable set to 0 if !chunkqueue_is_empty(con->write_queue)
1320 * after trying to write in connection_handle_write() above)*/
1321 if (((resched & 1) && con->is_writable>0 && !con->traffic_limit_reached)
1322 || (resched & 2))
1323 joblist_append(con);
1324
1325 if (h2_want_read(con))
1326 h2r->conf.stream_request_body |= FDEVENT_STREAM_REQUEST_POLLIN;
1327 else
1328 h2r->conf.stream_request_body &= ~FDEVENT_STREAM_REQUEST_POLLIN;
1329 }
1330 else /* e.g. CON_STATE_RESPONSE_END or CON_STATE_ERROR */
1331 connection_state_machine_loop(h2r, con);
1332
1333 connection_set_fdevent_interest(h2r, con);
1334 }
1335
1336
1337 static void
connection_state_machine_h1(connection * const con)1338 connection_state_machine_h1 (connection * const con)
1339 {
1340 request_st * const r = &con->request;
1341 connection_state_machine_loop(r, con);
1342
1343 if (r->conf.log_state_handling)
1344 connection_log_state(r, " at loop exit");
1345
1346 connection_set_fdevent_interest(r, con);
1347 }
1348
1349
1350 void
connection_state_machine(connection * const con)1351 connection_state_machine (connection * const con)
1352 {
1353 if (con->h2)
1354 connection_state_machine_h2(con);
1355 else /* if (r->http_version <= HTTP_VERSION_1_1) */
1356 connection_state_machine_h1(con);
1357 }
1358
1359
connection_check_timeout(connection * const con,const unix_time64_t cur_ts)1360 static void connection_check_timeout (connection * const con, const unix_time64_t cur_ts) {
1361 const int waitevents = fdevent_fdnode_interest(con->fdn);
1362 int changed = 0;
1363 int t_diff;
1364
1365 request_st * const r = &con->request;
1366 if (r->state == CON_STATE_CLOSE) {
1367 if (cur_ts - con->close_timeout_ts > HTTP_LINGER_TIMEOUT) {
1368 changed = 1;
1369 }
1370 }
1371 else if (con->h2 && r->state == CON_STATE_WRITE) {
1372 h2con * const h2c = con->h2;
1373 if (h2c->rused) {
1374 for (uint32_t i = 0; i < h2c->rused; ++i) {
1375 request_st * const rr = h2c->r[i];
1376 if (rr->state == CON_STATE_ERROR) { /*(should not happen)*/
1377 changed = 1;
1378 continue;
1379 }
1380 if (rr->reqbody_length != rr->reqbody_queue.bytes_in) {
1381 /* XXX: should timeout apply if not trying to read on h2con?
1382 * (still applying timeout to catch stuck connections) */
1383 /* XXX: con->read_idle_ts is not per-request, so timeout
1384 * will not occur if other read activity occurs on h2con
1385 * (future: might keep separate timestamp per-request) */
1386 if (cur_ts - con->read_idle_ts > rr->conf.max_read_idle) {
1387 /* time - out */
1388 if (rr->conf.log_request_handling) {
1389 log_error(rr->conf.errh, __FILE__, __LINE__,
1390 "request aborted - read timeout: %d", con->fd);
1391 }
1392 connection_set_state_error(r, CON_STATE_ERROR);
1393 changed = 1;
1394 }
1395 }
1396
1397 if (rr->state != CON_STATE_READ_POST
1398 && con->write_request_ts != 0) {
1399 /* XXX: con->write_request_ts is not per-request, so timeout
1400 * will not occur if other write activity occurs on h2con
1401 * (future: might keep separate timestamp per-request) */
1402 if (cur_ts - con->write_request_ts
1403 > r->conf.max_write_idle) {
1404 /*(see comment further down about max_write_idle)*/
1405 /* time - out */
1406 if (r->conf.log_timeouts) {
1407 log_error(r->conf.errh, __FILE__, __LINE__,
1408 "NOTE: a request from %s for %.*s timed out "
1409 "after writing %lld bytes. We waited %d seconds. "
1410 "If this is a problem, increase "
1411 "server.max-write-idle",
1412 r->dst_addr_buf->ptr,
1413 BUFFER_INTLEN_PTR(&r->target),
1414 (long long)r->write_queue.bytes_out,
1415 (int)r->conf.max_write_idle);
1416 }
1417 connection_set_state_error(r, CON_STATE_ERROR);
1418 changed = 1;
1419 }
1420 }
1421 }
1422 }
1423 else {
1424 if (cur_ts - con->read_idle_ts > con->keep_alive_idle) {
1425 /* time - out */
1426 if (r->conf.log_request_handling) {
1427 log_error(r->conf.errh, __FILE__, __LINE__,
1428 "connection closed - keep-alive timeout: %d",
1429 con->fd);
1430 }
1431 connection_set_state(r, CON_STATE_RESPONSE_END);
1432 changed = 1;
1433 }
1434 }
1435 /* process changes before optimistic read of additional HTTP/2 frames */
1436 if (changed)
1437 con->is_readable = 0;
1438 }
1439 else if (waitevents & FDEVENT_IN) {
1440 if (con->request_count == 1 || r->state != CON_STATE_READ) {
1441 /* e.g. CON_STATE_READ_POST || CON_STATE_WRITE */
1442 if (cur_ts - con->read_idle_ts > r->conf.max_read_idle) {
1443 /* time - out */
1444 if (r->conf.log_request_handling) {
1445 log_error(r->conf.errh, __FILE__, __LINE__,
1446 "connection closed - read timeout: %d", con->fd);
1447 }
1448
1449 connection_set_state_error(r, CON_STATE_ERROR);
1450 changed = 1;
1451 }
1452 } else {
1453 if (cur_ts - con->read_idle_ts > con->keep_alive_idle) {
1454 /* time - out */
1455 if (r->conf.log_request_handling) {
1456 log_error(r->conf.errh, __FILE__, __LINE__,
1457 "connection closed - keep-alive timeout: %d",
1458 con->fd);
1459 }
1460
1461 connection_set_state_error(r, CON_STATE_ERROR);
1462 changed = 1;
1463 }
1464 }
1465 }
1466
1467 /* max_write_idle timeout currently functions as backend timeout,
1468 * too, after response has been started.
1469 * Although backend timeouts now exist, there is no default for timeouts
1470 * to backends, so were this client timeout now to be changed to check
1471 * for write interest to the client, then timeout would not occur if the
1472 * backend hung and there was no backend read timeout set. Therefore,
1473 * max_write_idle timeout remains timeout for both reading from backend
1474 * and writing to client, though this check here is only for HTTP/1.1.
1475 * In the future, if there were a quick way to detect that a backend
1476 * read timeout was in effect, then this timeout could check for write
1477 * interest to client. (not a priority) */
1478 /*if (waitevents & FDEVENT_OUT)*/
1479 if (r->http_version <= HTTP_VERSION_1_1
1480 && r->state == CON_STATE_WRITE && con->write_request_ts != 0) {
1481 #if 0
1482 if (cur_ts - con->write_request_ts > 60) {
1483 log_error(r->conf.errh, __FILE__, __LINE__,
1484 "connection closed - pre-write-request-timeout: %d %d",
1485 con->fd, cur_ts - con->write_request_ts);
1486 }
1487 #endif
1488
1489 if (cur_ts - con->write_request_ts > r->conf.max_write_idle) {
1490 /* time - out */
1491 if (r->conf.log_timeouts) {
1492 log_error(r->conf.errh, __FILE__, __LINE__,
1493 "NOTE: a request from %s for %.*s timed out after writing "
1494 "%lld bytes. We waited %d seconds. If this is a problem, "
1495 "increase server.max-write-idle",
1496 r->dst_addr_buf->ptr,
1497 BUFFER_INTLEN_PTR(&r->target),
1498 (long long)con->write_queue->bytes_out,
1499 (int)r->conf.max_write_idle);
1500 }
1501 connection_set_state_error(r, CON_STATE_ERROR);
1502 changed = 1;
1503 }
1504 }
1505
1506 /* lighttpd HTTP/2 limitation: rate limit config r->conf.bytes_per_second
1507 * (currently) taken only from top-level config (socket), with host if SNI
1508 * used, but not any other config conditions, e.g. not per-file-type */
1509
1510 if (0 == (t_diff = cur_ts - con->connection_start)) t_diff = 1;
1511
1512 if (con->traffic_limit_reached &&
1513 (r->conf.bytes_per_second == 0
1514 || con->write_queue->bytes_out
1515 < (off_t)r->conf.bytes_per_second * t_diff)) {
1516 /* enable connection again */
1517 con->traffic_limit_reached = 0;
1518
1519 changed = 1;
1520 }
1521
1522 con->bytes_written_cur_second = 0;
1523
1524 if (changed) {
1525 connection_state_machine(con);
1526 }
1527 }
1528
connection_periodic_maint(server * const srv,const unix_time64_t cur_ts)1529 void connection_periodic_maint (server * const srv, const unix_time64_t cur_ts) {
1530 /* check all connections for timeouts */
1531 for (connection *con = srv->conns, *tc; con; con = tc) {
1532 tc = con->next;
1533 connection_check_timeout(con, cur_ts);
1534 }
1535 }
1536
connection_graceful_shutdown_maint(server * srv)1537 void connection_graceful_shutdown_maint (server *srv) {
1538 const int graceful_expire =
1539 (srv->graceful_expire_ts && srv->graceful_expire_ts < log_monotonic_secs);
1540 for (connection *con = srv->conns, *tc; con; con = tc) {
1541 tc = con->next;
1542 int changed = 0;
1543
1544 request_st * const r = &con->request;
1545 if (r->state == CON_STATE_CLOSE) {
1546 /* reduce remaining linger timeout to be
1547 * (from zero) *up to* one more second, but no more */
1548 if (HTTP_LINGER_TIMEOUT > 1)
1549 con->close_timeout_ts -= (HTTP_LINGER_TIMEOUT - 1);
1550 if (log_monotonic_secs - con->close_timeout_ts > HTTP_LINGER_TIMEOUT)
1551 changed = 1;
1552 }
1553 else if (con->h2 && r->state == CON_STATE_WRITE) {
1554 h2_send_goaway(con, H2_E_NO_ERROR);
1555 if (0 == con->h2->rused && chunkqueue_is_empty(con->write_queue)) {
1556 connection_set_state(r, CON_STATE_RESPONSE_END);
1557 changed = 1;
1558 }
1559 }
1560 else if (r->state == CON_STATE_READ && con->request_count > 1
1561 && chunkqueue_is_empty(con->read_queue)) {
1562 /* close connections in keep-alive waiting for next request */
1563 connection_set_state_error(r, CON_STATE_ERROR);
1564 changed = 1;
1565 }
1566
1567 if (graceful_expire) {
1568 connection_set_state_error(r, CON_STATE_ERROR);
1569 changed = 1;
1570 }
1571
1572 r->keep_alive = 0; /* disable keep-alive */
1573
1574 r->conf.bytes_per_second = 0; /* disable rate limit */
1575 r->conf.global_bytes_per_second = 0; /* disable rate limit */
1576 if (con->traffic_limit_reached) {
1577 con->traffic_limit_reached = 0;
1578 changed = 1;
1579 }
1580
1581 if (changed) {
1582 connection_state_machine(con);
1583 }
1584 }
1585 }
1586
1587
1588 static int
connection_handle_read_post_cq_compact(chunkqueue * const cq)1589 connection_handle_read_post_cq_compact (chunkqueue * const cq)
1590 {
1591 /* combine first mem chunk with next non-empty mem chunk
1592 * (loop if next chunk is empty) */
1593 chunk *c = cq->first;
1594 if (NULL == c) return 0;
1595 const uint32_t mlen = buffer_clen(c->mem) - (size_t)c->offset;
1596 while ((c = c->next)) {
1597 const uint32_t blen = buffer_clen(c->mem) - (size_t)c->offset;
1598 if (0 == blen) continue;
1599 chunkqueue_compact_mem(cq, mlen + blen);
1600 return 1;
1601 }
1602 return 0;
1603 }
1604
1605
1606 __attribute_pure__
1607 static int
connection_handle_read_post_chunked_crlf(chunkqueue * const cq)1608 connection_handle_read_post_chunked_crlf (chunkqueue * const cq)
1609 {
1610 /* caller might check chunkqueue_length(cq) >= 2 before calling here
1611 * to limit return value to either 1 for good or -1 for error */
1612 chunk *c;
1613 buffer *b;
1614 char *p;
1615 size_t len;
1616
1617 /* caller must have called chunkqueue_remove_finished_chunks(cq), so if
1618 * chunkqueue is not empty, it contains chunk with at least one char */
1619 if (chunkqueue_is_empty(cq)) return 0;
1620
1621 c = cq->first;
1622 b = c->mem;
1623 p = b->ptr+c->offset;
1624 if (p[0] != '\r') return -1; /* error */
1625 if (p[1] == '\n') return 1;
1626 len = buffer_clen(b) - (size_t)c->offset;
1627 if (1 != len) return -1; /* error */
1628
1629 while (NULL != (c = c->next)) {
1630 b = c->mem;
1631 len = buffer_clen(b) - (size_t)c->offset;
1632 if (0 == len) continue;
1633 p = b->ptr+c->offset;
1634 return (p[0] == '\n') ? 1 : -1; /* error if not '\n' */
1635 }
1636 return 0;
1637 }
1638
1639
1640 static handler_t
connection_handle_read_post_chunked(request_st * const r,chunkqueue * const cq,chunkqueue * const dst_cq)1641 connection_handle_read_post_chunked (request_st * const r, chunkqueue * const cq, chunkqueue * const dst_cq)
1642 {
1643 /* r->conf.max_request_size is in kBytes */
1644 const off_t max_request_size = (off_t)r->conf.max_request_size << 10;
1645 off_t te_chunked = r->te_chunked;
1646 do {
1647 off_t len = chunkqueue_length(cq);
1648
1649 while (0 == te_chunked) {
1650 char *p;
1651 chunk *c = cq->first;
1652 if (NULL == c) break;
1653 force_assert(c->type == MEM_CHUNK);
1654 p = strchr(c->mem->ptr+c->offset, '\n');
1655 if (NULL != p) { /* found HTTP chunked header line */
1656 off_t hsz = p + 1 - (c->mem->ptr+c->offset);
1657 unsigned char *s = (unsigned char *)c->mem->ptr+c->offset;
1658 for (unsigned char u;(u=(unsigned char)hex2int(*s))!=0xFF;++s) {
1659 if (te_chunked > (off_t)(1uLL<<(8*sizeof(off_t)-5))-1-2) {
1660 log_error(r->conf.errh, __FILE__, __LINE__,
1661 "chunked data size too large -> 400");
1662 /* 400 Bad Request */
1663 return http_response_reqbody_read_error(r, 400);
1664 }
1665 te_chunked <<= 4;
1666 te_chunked |= u;
1667 }
1668 if (s == (unsigned char *)c->mem->ptr+c->offset) { /*(no hex)*/
1669 log_error(r->conf.errh, __FILE__, __LINE__,
1670 "chunked header invalid chars -> 400");
1671 /* 400 Bad Request */
1672 return http_response_reqbody_read_error(r, 400);
1673 }
1674 while (*s == ' ' || *s == '\t') ++s;
1675 if (*s != '\r' && *s != ';') {
1676 log_error(r->conf.errh, __FILE__, __LINE__,
1677 "chunked header invalid chars -> 400");
1678 /* 400 Bad Request */
1679 return http_response_reqbody_read_error(r, 400);
1680 }
1681
1682 if (hsz >= 1024) {
1683 /* prevent theoretical integer overflow
1684 * casting to (size_t) and adding 2 (for "\r\n") */
1685 log_error(r->conf.errh, __FILE__, __LINE__,
1686 "chunked header line too long -> 400");
1687 /* 400 Bad Request */
1688 return http_response_reqbody_read_error(r, 400);
1689 }
1690
1691 if (0 == te_chunked) {
1692 /* do not consume final chunked header until
1693 * (optional) trailers received along with
1694 * request-ending blank line "\r\n" */
1695 if (p[0] == '\r' && p[1] == '\n') {
1696 /*(common case with no trailers; final \r\n received)*/
1697 hsz += 2;
1698 }
1699 else {
1700 /* trailers or final CRLF crosses into next cq chunk */
1701 hsz -= 2;
1702 do {
1703 c = cq->first;
1704 p = strstr(c->mem->ptr+c->offset+hsz, "\r\n\r\n");
1705 } while (NULL == p
1706 && connection_handle_read_post_cq_compact(cq));
1707 if (NULL == p) {
1708 /*(effectively doubles max request field size
1709 * potentially received by backend, if in the future
1710 * these trailers are added to request headers)*/
1711 if ((off_t)buffer_clen(c->mem) - c->offset
1712 < (off_t)r->conf.max_request_field_size) {
1713 break;
1714 }
1715 else {
1716 /* ignore excessively long trailers;
1717 * disable keep-alive on connection */
1718 r->keep_alive = 0;
1719 p = c->mem->ptr + buffer_clen(c->mem)
1720 - 4;
1721 }
1722 }
1723 hsz = p + 4 - (c->mem->ptr+c->offset);
1724 /* trailers currently ignored, but could be processed
1725 * here if 0 == (r->conf.stream_request_body &
1726 * & (FDEVENT_STREAM_REQUEST
1727 * |FDEVENT_STREAM_REQUEST_BUFMIN))
1728 * taking care to reject fields forbidden in trailers,
1729 * making trailers available to CGI and other backends*/
1730 }
1731 chunkqueue_mark_written(cq, (size_t)hsz);
1732 r->reqbody_length = dst_cq->bytes_in;
1733 break; /* done reading HTTP chunked request body */
1734 }
1735
1736 /* consume HTTP chunked header */
1737 chunkqueue_mark_written(cq, (size_t)hsz);
1738 len = chunkqueue_length(cq);
1739
1740 if (0 !=max_request_size
1741 && (max_request_size < te_chunked
1742 || max_request_size - te_chunked < dst_cq->bytes_in)) {
1743 log_error(r->conf.errh, __FILE__, __LINE__,
1744 "request-size too long: %lld -> 413",
1745 (long long)(dst_cq->bytes_in + te_chunked));
1746 /* 413 Payload Too Large */
1747 return http_response_reqbody_read_error(r, 413);
1748 }
1749
1750 te_chunked += 2; /*(for trailing "\r\n" after chunked data)*/
1751
1752 break; /* read HTTP chunked header */
1753 }
1754
1755 /*(likely better ways to handle chunked header crossing chunkqueue
1756 * chunks, but this situation is not expected to occur frequently)*/
1757 if ((off_t)buffer_clen(c->mem) - c->offset >= 1024) {
1758 log_error(r->conf.errh, __FILE__, __LINE__,
1759 "chunked header line too long -> 400");
1760 /* 400 Bad Request */
1761 return http_response_reqbody_read_error(r, 400);
1762 }
1763 else if (!connection_handle_read_post_cq_compact(cq)) {
1764 break;
1765 }
1766 }
1767 if (0 == te_chunked) break;
1768
1769 if (te_chunked > 2) {
1770 if (len > te_chunked-2) len = te_chunked-2;
1771 if (dst_cq->bytes_in + te_chunked <= 64*1024) {
1772 /* avoid buffering request bodies <= 64k on disk */
1773 chunkqueue_steal(dst_cq, cq, len);
1774 }
1775 else if (0 != chunkqueue_steal_with_tempfiles(dst_cq, cq, len,
1776 r->conf.errh)) {
1777 /* 500 Internal Server Error */
1778 return http_response_reqbody_read_error(r, 500);
1779 }
1780 te_chunked -= len;
1781 len = chunkqueue_length(cq);
1782 }
1783
1784 if (len < te_chunked) break;
1785
1786 if (2 == te_chunked) {
1787 if (-1 == connection_handle_read_post_chunked_crlf(cq)) {
1788 log_error(r->conf.errh, __FILE__, __LINE__,
1789 "chunked data missing end CRLF -> 400");
1790 /* 400 Bad Request */
1791 return http_response_reqbody_read_error(r, 400);
1792 }
1793 chunkqueue_mark_written(cq, 2);/*consume \r\n at end of chunk data*/
1794 te_chunked -= 2;
1795 }
1796
1797 } while (!chunkqueue_is_empty(cq));
1798
1799 r->te_chunked = te_chunked;
1800 return HANDLER_GO_ON;
1801 }
1802
1803
1804 static handler_t
connection_handle_read_body_unknown(request_st * const r,chunkqueue * const cq,chunkqueue * const dst_cq)1805 connection_handle_read_body_unknown (request_st * const r, chunkqueue * const cq, chunkqueue * const dst_cq)
1806 {
1807 /* r->conf.max_request_size is in kBytes */
1808 const off_t max_request_size = (off_t)r->conf.max_request_size << 10;
1809 chunkqueue_append_chunkqueue(dst_cq, cq);
1810 if (0 != max_request_size && dst_cq->bytes_in > max_request_size) {
1811 log_error(r->conf.errh, __FILE__, __LINE__,
1812 "request-size too long: %lld -> 413", (long long)dst_cq->bytes_in);
1813 /* 413 Payload Too Large */
1814 return http_response_reqbody_read_error(r, 413);
1815 }
1816 return HANDLER_GO_ON;
1817 }
1818
1819
1820 __attribute_cold__
1821 static int
connection_check_expect_100(request_st * const r,connection * const con)1822 connection_check_expect_100 (request_st * const r, connection * const con)
1823 {
1824 if (con->is_writable <= 0)
1825 return 1;
1826
1827 const buffer * const vb =
1828 http_header_request_get(r, HTTP_HEADER_EXPECT,
1829 CONST_STR_LEN("Expect"));
1830 if (NULL == vb)
1831 return 1;
1832
1833 /* (always unset Expect header so that check is not repeated for request */
1834 int rc = buffer_eq_icase_slen(vb, CONST_STR_LEN("100-continue"));
1835 http_header_request_unset(r, HTTP_HEADER_EXPECT,
1836 CONST_STR_LEN("Expect"));
1837 if (!rc
1838 || 0 != r->reqbody_queue.bytes_in
1839 || !chunkqueue_is_empty(&r->read_queue)
1840 || !chunkqueue_is_empty(&r->write_queue))
1841 return 1;
1842
1843 /* send 100 Continue only if no request body data received yet
1844 * and response has not yet started (checked above) */
1845 if (r->http_version > HTTP_VERSION_1_1)
1846 h2_send_100_continue(r, con);
1847 else if (r->http_version == HTTP_VERSION_1_1)
1848 return connection_write_100_continue(r, con);
1849
1850 return 1;
1851 }
1852
1853
1854 static handler_t
connection_handle_read_post_state(request_st * const r)1855 connection_handle_read_post_state (request_st * const r)
1856 {
1857 connection * const con = r->con;
1858 chunkqueue * const cq = &r->read_queue;
1859 chunkqueue * const dst_cq = &r->reqbody_queue;
1860
1861 int is_closed = 0;
1862
1863 if (r->http_version > HTTP_VERSION_1_1) {
1864 /*(H2_STATE_HALF_CLOSED_REMOTE or H2_STATE_CLOSED)*/
1865 if (r->h2state >= H2_STATE_HALF_CLOSED_REMOTE)
1866 is_closed = 1;
1867 }
1868 else if (con->is_readable > 0) {
1869 con->read_idle_ts = log_monotonic_secs;
1870 const off_t max_per_read =
1871 !(r->conf.stream_request_body /*(if not streaming request body)*/
1872 & (FDEVENT_STREAM_REQUEST|FDEVENT_STREAM_REQUEST_BUFMIN))
1873 ? MAX_READ_LIMIT
1874 : (r->conf.stream_request_body & FDEVENT_STREAM_REQUEST_BUFMIN)
1875 ? 16384 /* FDEVENT_STREAM_REQUEST_BUFMIN */
1876 : 65536; /* FDEVENT_STREAM_REQUEST */
1877 switch(con->network_read(con, cq, max_per_read)) {
1878 case -1:
1879 connection_set_state_error(r, CON_STATE_ERROR);
1880 return HANDLER_ERROR;
1881 case -2:
1882 is_closed = 1;
1883 break;
1884 default:
1885 break;
1886 }
1887
1888 chunkqueue_remove_finished_chunks(cq);
1889 }
1890
1891 /* Check for Expect: 100-continue in request headers */
1892 if (light_btst(r->rqst_htags, HTTP_HEADER_EXPECT)
1893 && !connection_check_expect_100(r, con))
1894 return HANDLER_ERROR;
1895
1896 if (r->http_version > HTTP_VERSION_1_1) {
1897 /* h2_recv_data() places frame payload directly into r->reqbody_queue */
1898 }
1899 else if (r->reqbody_length < 0) {
1900 /*(-1: Transfer-Encoding: chunked, -2: unspecified length)*/
1901 handler_t rc = (-1 == r->reqbody_length)
1902 ? connection_handle_read_post_chunked(r, cq, dst_cq)
1903 : connection_handle_read_body_unknown(r, cq, dst_cq);
1904 if (HANDLER_GO_ON != rc) return rc;
1905 chunkqueue_remove_finished_chunks(cq);
1906 }
1907 else {
1908 off_t len = (off_t)r->reqbody_length - dst_cq->bytes_in;
1909 if (r->reqbody_length <= 64*1024) {
1910 /* don't buffer request bodies <= 64k on disk */
1911 chunkqueue_steal(dst_cq, cq, len);
1912 }
1913 else if (chunkqueue_length(dst_cq) + len <= 64*1024
1914 && (!dst_cq->first || dst_cq->first->type == MEM_CHUNK)) {
1915 /* avoid tempfiles when streaming request body to fast backend */
1916 chunkqueue_steal(dst_cq, cq, len);
1917 }
1918 else if (0 !=
1919 chunkqueue_steal_with_tempfiles(dst_cq,cq,len,r->conf.errh)) {
1920 /* writing to temp file failed */ /* Internal Server Error */
1921 return http_response_reqbody_read_error(r, 500);
1922 }
1923 chunkqueue_remove_finished_chunks(cq);
1924 }
1925
1926 if (dst_cq->bytes_in == (off_t)r->reqbody_length) {
1927 /* Content is ready */
1928 r->conf.stream_request_body &= ~FDEVENT_STREAM_REQUEST_POLLIN;
1929 if (r->state == CON_STATE_READ_POST) {
1930 connection_set_state(r, CON_STATE_HANDLE_REQUEST);
1931 }
1932 return HANDLER_GO_ON;
1933 }
1934 else if (is_closed) {
1935 #if 0
1936 return http_response_reqbody_read_error(r, 400); /* Bad Request */
1937 #endif
1938 return HANDLER_ERROR;
1939 }
1940 else {
1941 r->conf.stream_request_body |= FDEVENT_STREAM_REQUEST_POLLIN;
1942 return (r->conf.stream_request_body & FDEVENT_STREAM_REQUEST)
1943 ? HANDLER_GO_ON
1944 : HANDLER_WAIT_FOR_EVENT;
1945 }
1946 }
1947