xref: /lighttpd1.4/src/request.c (revision dc01487e)
1 /*
2  * request - HTTP request processing
3  *
4  * Fully-rewritten from original EXCEPT for request_check_hostname()
5  * Copyright(c) 2018 Glenn Strauss gstrauss()gluelogic.com  All rights reserved
6  * License: BSD 3-clause (same as lighttpd)
7  */
8 #include "first.h"
9 
10 #include "request.h"
11 #include "burl.h"
12 #include "http_header.h"
13 #include "http_kv.h"
14 #include "log.h"
15 #include "sock_addr.h"
16 
17 #include <limits.h>
18 #include <stdint.h>
19 #include <stdlib.h>
20 #include <string.h>
21 
22 static int request_check_hostname(buffer * const host) {
23 	enum { DOMAINLABEL, TOPLABEL } stage = TOPLABEL;
24 	size_t i;
25 	int label_len = 0;
26 	size_t host_len, hostport_len;
27 	char *colon;
28 	int is_ip = -1; /* -1 don't know yet, 0 no, 1 yes */
29 	int level = 0;
30 
31 	/*
32 	 *       hostport      = host [ ":" port ]
33 	 *       host          = hostname | IPv4address | IPv6address
34 	 *       hostname      = *( domainlabel "." ) toplabel [ "." ]
35 	 *       domainlabel   = alphanum | alphanum *( alphanum | "-" ) alphanum
36 	 *       toplabel      = alpha | alpha *( alphanum | "-" ) alphanum
37 	 *       IPv4address   = 1*digit "." 1*digit "." 1*digit "." 1*digit
38 	 *       IPv6address   = "[" ... "]"
39 	 *       port          = *digit
40 	 */
41 
42 	/* IPv6 address */
43 	if (host->ptr[0] == '[') {
44 		char *c = host->ptr + 1;
45 		int colon_cnt = 0;
46 
47 		/* check the address inside [...] */
48 		for (; *c && *c != ']'; c++) {
49 			if (*c == ':') {
50 				if (++colon_cnt > 7) {
51 					return -1;
52 				}
53 			} else if (!light_isxdigit(*c) && '.' != *c) {
54 				return -1;
55 			}
56 		}
57 
58 		/* missing ] */
59 		if (!*c) {
60 			return -1;
61 		}
62 
63 		/* check port */
64 		if (*(c+1) == ':') {
65 			for (c += 2; *c; c++) {
66 				if (!light_isdigit(*c)) {
67 					return -1;
68 				}
69 			}
70 		}
71 		else if ('\0' != *(c+1)) {
72 			/* only a port is allowed to follow [...] */
73 			return -1;
74 		}
75 		return 0;
76 	}
77 
78 	hostport_len = host_len = buffer_string_length(host);
79 
80 	if (NULL != (colon = memchr(host->ptr, ':', host_len))) {
81 		char *c = colon + 1;
82 
83 		/* check portnumber */
84 		for (; *c; c++) {
85 			if (!light_isdigit(*c)) return -1;
86 		}
87 
88 		/* remove the port from the host-len */
89 		host_len = colon - host->ptr;
90 	}
91 
92 	/* Host is empty */
93 	if (host_len == 0) return -1;
94 
95 	/* if the hostname ends in a "." strip it */
96 	if (host->ptr[host_len-1] == '.') {
97 		/* shift port info one left */
98 		if (NULL != colon) memmove(colon-1, colon, hostport_len - host_len);
99 		buffer_string_set_length(host, --hostport_len);
100 		if (--host_len == 0) return -1;
101 	}
102 
103 
104 	/* scan from the right and skip the \0 */
105 	for (i = host_len; i-- > 0; ) {
106 		const char c = host->ptr[i];
107 
108 		switch (stage) {
109 		case TOPLABEL:
110 			if (c == '.') {
111 				/* only switch stage, if this is not the last character */
112 				if (i != host_len - 1) {
113 					if (label_len == 0) {
114 						return -1;
115 					}
116 
117 					/* check the first character at right of the dot */
118 					if (is_ip == 0) {
119 						if (!light_isalnum(host->ptr[i+1])) {
120 							return -1;
121 						}
122 					} else if (!light_isdigit(host->ptr[i+1])) {
123 						is_ip = 0;
124 					} else if ('-' == host->ptr[i+1]) {
125 						return -1;
126 					} else {
127 						/* just digits */
128 						is_ip = 1;
129 					}
130 
131 					stage = DOMAINLABEL;
132 
133 					label_len = 0;
134 					level++;
135 				} else if (i == 0) {
136 					/* just a dot and nothing else is evil */
137 					return -1;
138 				}
139 			} else if (i == 0) {
140 				/* the first character of the hostname */
141 				if (!light_isalnum(c)) {
142 					return -1;
143 				}
144 				label_len++;
145 			} else {
146 				if (c != '-' && !light_isalnum(c)) {
147 					return -1;
148 				}
149 				if (is_ip == -1) {
150 					if (!light_isdigit(c)) is_ip = 0;
151 				}
152 				label_len++;
153 			}
154 
155 			break;
156 		case DOMAINLABEL:
157 			if (is_ip == 1) {
158 				if (c == '.') {
159 					if (label_len == 0) {
160 						return -1;
161 					}
162 
163 					label_len = 0;
164 					level++;
165 				} else if (!light_isdigit(c)) {
166 					return -1;
167 				} else {
168 					label_len++;
169 				}
170 			} else {
171 				if (c == '.') {
172 					if (label_len == 0) {
173 						return -1;
174 					}
175 
176 					/* c is either - or alphanum here */
177 					if ('-' == host->ptr[i+1]) {
178 						return -1;
179 					}
180 
181 					label_len = 0;
182 					level++;
183 				} else if (i == 0) {
184 					if (!light_isalnum(c)) {
185 						return -1;
186 					}
187 					label_len++;
188 				} else {
189 					if (c != '-' && !light_isalnum(c)) {
190 						return -1;
191 					}
192 					label_len++;
193 				}
194 			}
195 
196 			break;
197 		}
198 	}
199 
200 	/* a IP has to consist of 4 parts */
201 	if (is_ip == 1 && level != 3) {
202 		return -1;
203 	}
204 
205 	if (label_len == 0) {
206 		return -1;
207 	}
208 
209 	return 0;
210 }
211 
212 int http_request_host_normalize(buffer * const b, const int scheme_port) {
213     /*
214      * check for and canonicalize numeric IP address and portnum (optional)
215      * (IP address may be followed by ":portnum" (optional))
216      * - IPv6: "[...]"
217      * - IPv4: "x.x.x.x"
218      * - IPv4: 12345678   (32-bit decimal number)
219      * - IPv4: 012345678  (32-bit octal number)
220      * - IPv4: 0x12345678 (32-bit hex number)
221      *
222      * allow any chars (except ':' and '\0' and stray '[' or ']')
223      *   (other code may check chars more strictly or more pedantically)
224      * ':'  delimits (optional) port at end of string
225      * "[]" wraps IPv6 address literal
226      * '\0' should have been rejected earlier were it present
227      *
228      * any chars includes, but is not limited to:
229      * - allow '-' any where, even at beginning of word
230      *     (security caution: might be confused for cmd flag if passed to shell)
231      * - allow all-digit TLDs
232      *     (might be mistaken for IPv4 addr by inet_aton()
233      *      unless non-digits appear in subdomain)
234      */
235 
236     /* Note: not using getaddrinfo() since it does not support "[]" around IPv6
237      * and is not as lenient as inet_aton() and inet_addr() for IPv4 strings.
238      * Not using inet_pton() (when available) on IPv4 for similar reasons. */
239 
240     const char * const p = b->ptr;
241     const size_t blen = buffer_string_length(b);
242     long port = 0;
243 
244     if (*p != '[') {
245         char * const colon = (char *)memchr(p, ':', blen);
246         if (colon) {
247             if (*p == ':') return -1; /*(empty host then port, or naked IPv6)*/
248             if (colon[1] != '\0') {
249                 char *e;
250                 port = strtol(colon+1, &e, 0); /*(allow decimal, octal, hex)*/
251                 if (0 < port && port <= USHRT_MAX && *e == '\0') {
252                     /* valid port */
253                 } else {
254                     return -1;
255                 }
256             } /*(else ignore stray colon at string end)*/
257             buffer_string_set_length(b, (size_t)(colon - p)); /*(remove port str)*/
258         }
259 
260         if (light_isdigit(*p)) do {
261             /* (IPv4 address literal or domain starting w/ digit (e.g. 3com))*/
262             /* (check one-element cache of normalized IPv4 address string) */
263             static struct { char s[INET_ADDRSTRLEN]; size_t n; } laddr;
264             size_t n = colon ? (size_t)(colon - p) : blen;
265             sock_addr addr;
266             if (n == laddr.n && 0 == memcmp(p, laddr.s, n)) break;
267             if (1 == sock_addr_inet_pton(&addr, p, AF_INET, 0)) {
268                 sock_addr_inet_ntop_copy_buffer(b, &addr);
269                 n = buffer_string_length(b);
270                 if (n < sizeof(laddr.s)) memcpy(laddr.s, b->ptr, (laddr.n = n));
271             }
272         } while (0);
273     } else do { /* IPv6 addr */
274       #if defined(HAVE_IPV6) && defined(HAVE_INET_PTON)
275 
276         /* (check one-element cache of normalized IPv4 address string) */
277         static struct { char s[INET6_ADDRSTRLEN]; size_t n; } laddr;
278         sock_addr addr;
279         char *bracket = b->ptr+blen-1;
280         char *percent = strchr(b->ptr+1, '%');
281         size_t len;
282         int rc;
283         char buf[INET6_ADDRSTRLEN+16]; /*(+16 for potential %interface name)*/
284         if (blen <= 2) return -1; /*(invalid "[]")*/
285         if (*bracket != ']') {
286             bracket = (char *)memchr(b->ptr+1, ']', blen-1);
287             if (NULL == bracket || bracket[1] != ':'  || bracket - b->ptr == 1){
288                return -1;
289             }
290             if (bracket[2] != '\0') { /*(ignore stray colon at string end)*/
291                 char *e;
292                 port = strtol(bracket+2, &e, 0); /*(allow decimal, octal, hex)*/
293                 if (0 < port && port <= USHRT_MAX && *e == '\0') {
294                     /* valid port */
295                 } else {
296                     return -1;
297                 }
298             }
299         }
300 
301         len = (size_t)((percent ? percent : bracket) - (b->ptr+1));
302         if (laddr.n == len && 0 == memcmp(laddr.s, b->ptr+1, len)) {
303             /* truncate after ']' and re-add normalized port, if needed */
304             buffer_string_set_length(b, (size_t)(bracket - b->ptr + 1));
305             break;
306         }
307 
308         *bracket = '\0';/*(terminate IPv6 string)*/
309         if (percent) *percent = '\0'; /*(remove %interface from address)*/
310         rc = sock_addr_inet_pton(&addr, b->ptr+1, AF_INET6, 0);
311         if (percent) *percent = '%'; /*(restore %interface)*/
312         *bracket = ']'; /*(restore bracket)*/
313         if (1 != rc) return -1;
314 
315         sock_addr_inet_ntop(&addr, buf, sizeof(buf));
316         len = strlen(buf);
317         if (percent) {
318             if (percent > bracket) return -1;
319             if (len + (size_t)(bracket - percent) >= sizeof(buf)) return -1;
320             if (len < sizeof(laddr.s)) memcpy(laddr.s, buf, (laddr.n = len));
321             memcpy(buf+len, percent, (size_t)(bracket - percent));
322             len += (size_t)(bracket - percent);
323         }
324         buffer_string_set_length(b, 1); /* truncate after '[' */
325         buffer_append_str2(b, buf, len, CONST_STR_LEN("]"));
326 
327       #else
328 
329         return -1;
330 
331       #endif
332     } while (0);
333 
334     if (0 != port && port != scheme_port) {
335         buffer_append_string_len(b, CONST_STR_LEN(":"));
336         buffer_append_int(b, (int)port);
337     }
338 
339     return 0;
340 }
341 
342 int http_request_host_policy (buffer * const b, const unsigned int http_parseopts, const int scheme_port) {
343     return (((http_parseopts & HTTP_PARSEOPT_HOST_STRICT)
344              && 0 != request_check_hostname(b))
345             || ((http_parseopts & HTTP_PARSEOPT_HOST_NORMALIZE)
346                 && 0 != http_request_host_normalize(b, scheme_port)));
347 }
348 
349 __attribute_const__
350 static int request_uri_is_valid_char(const unsigned char c) {
351 	return (c > 32 && c != 127 && c != 255);
352 }
353 
354 __attribute_cold__
355 __attribute_noinline__
356 static int http_request_header_line_invalid(request_st * const restrict r, const int status, const char * const restrict msg) {
357     if (r->conf.log_request_header_on_error) {
358         if (msg) log_error(r->conf.errh, __FILE__, __LINE__, "%s", msg);
359     }
360     return status;
361 }
362 
363 __attribute_cold__
364 __attribute_noinline__
365 static int http_request_header_char_invalid(request_st * const restrict r, const char ch, const char * const restrict msg) {
366     if (r->conf.log_request_header_on_error) {
367         if ((unsigned char)ch > 32 && ch != 127) {
368             log_error(r->conf.errh, __FILE__, __LINE__, "%s ('%c')", msg, ch);
369         }
370         else {
371             log_error(r->conf.errh, __FILE__, __LINE__, "%s (0x%x)", msg, ch);
372         }
373     }
374     return 400;
375 }
376 
377 
378 int64_t
379 li_restricted_strtoint64 (const char *v, const uint32_t vlen, const char ** const err)
380 {
381     /* base 10 strtoll() parsing exactly vlen chars and requiring digits 0-9 */
382     /* rejects negative numbers and considers values > INT64_MAX an error */
383     /* note: errno is not set; detect error if *err != v+vlen upon return */
384     /*(caller must check 0 == vlen if that is to be an error for caller)*/
385     int64_t rv = 0;
386     uint32_t i;
387     for (i = 0; i < vlen; ++i) {
388         const uint8_t c = ((uint8_t *)v)[i] - '0'; /*(unsigned; underflow ok)*/
389         if (c > 9) break;
390         if (rv > INT64_MAX/10) break;
391         rv *= 10;
392         if (rv > INT64_MAX - c) break;
393         rv += c;
394     }
395     *err = v+i;
396     return rv;
397 }
398 
399 
400 /* add header to list of headers
401  * certain headers are also parsed
402  * might drop a header if deemed unnecessary/broken
403  *
404  * returns 0 on success, HTTP status on error
405  */
406 static int http_request_parse_single_header(request_st * const restrict r, const enum http_header_e id, const char * const restrict k, const size_t klen, const char * const restrict v, const size_t vlen) {
407     buffer **saveb = NULL;
408 
409     /*
410      * Note: k might not be '\0'-terminated
411      * Note: v is not '\0'-terminated
412      *   With lighttpd HTTP/1.1 parser, v ends with whitespace
413      *     (one of '\r' '\n' ' ' '\t')
414      *   With lighttpd HTTP/2 parser, v should not be accessed beyond vlen
415      *     (care must be taken to avoid libc funcs which expect z-strings)
416      */
417     /*assert(vlen);*//*(caller must not call this func with 0 klen or 0 vlen)*/
418 
419     switch (id) {
420       /*case HTTP_HEADER_OTHER:*/
421       default:
422         break;
423       case HTTP_HEADER_HOST:
424         if (!light_btst(r->rqst_htags, HTTP_HEADER_HOST)) {
425             saveb = &r->http_host;
426             if (vlen >= 1024) { /*(expecting < 256)*/
427                 return http_request_header_line_invalid(r, 400, "uri-authority too long -> 400");
428             }
429         }
430         else if (NULL != r->http_host
431                  && buffer_is_equal_string(r->http_host, v, vlen)) {
432             /* ignore all Host: headers if match authority in request line */
433             return 0; /* ignore header */
434         }
435         else {
436             return http_request_header_line_invalid(r, 400, "duplicate Host header -> 400");
437         }
438         break;
439       case HTTP_HEADER_CONNECTION:
440         /* "Connection: close" is common case if header is present */
441         if ((vlen == 5 && buffer_eq_icase_ssn(v, CONST_STR_LEN("close")))
442             || http_header_str_contains_token(v,vlen,CONST_STR_LEN("close"))) {
443             r->keep_alive = 0;
444             break;
445         }
446         if (http_header_str_contains_token(v,vlen,CONST_STR_LEN("keep-alive"))){
447             r->keep_alive = 1;
448             break;
449         }
450         break;
451       case HTTP_HEADER_CONTENT_TYPE:
452         if (light_btst(r->rqst_htags, HTTP_HEADER_CONTENT_TYPE)) {
453             return http_request_header_line_invalid(r, 400, "duplicate Content-Type header -> 400");
454         }
455         break;
456       case HTTP_HEADER_IF_NONE_MATCH:
457         /* if dup, only the first one will survive */
458         if (light_btst(r->rqst_htags, HTTP_HEADER_IF_NONE_MATCH)) {
459             return 0; /* ignore header */
460         }
461         break;
462       case HTTP_HEADER_CONTENT_LENGTH:
463         if (!light_btst(r->rqst_htags, HTTP_HEADER_CONTENT_LENGTH)) {
464             /*(trailing whitespace was removed from vlen)*/
465             /*(not using strtoll() since v might not be z-string)*/
466             const char *err;
467             off_t clen = (off_t)li_restricted_strtoint64(v, vlen, &err);
468             if (err == v+vlen) {
469                 /* (set only if not set to -1 by Transfer-Encoding: chunked) */
470                 if (0 == r->reqbody_length) r->reqbody_length = clen;
471             }
472             else {
473                 return http_request_header_line_invalid(r, 400, "invalid Content-Length header -> 400");
474             }
475         }
476         else {
477             return http_request_header_line_invalid(r, 400, "duplicate Content-Length header -> 400");
478         }
479         break;
480       case HTTP_HEADER_HTTP2_SETTINGS:
481         if (light_btst(r->rqst_htags, HTTP_HEADER_HTTP2_SETTINGS)) {
482             return http_request_header_line_invalid(r, 400, "duplicate HTTP2-Settings header -> 400");
483         }
484         break;
485       case HTTP_HEADER_IF_MODIFIED_SINCE:
486         if (light_btst(r->rqst_htags, HTTP_HEADER_IF_MODIFIED_SINCE)) {
487             /* Proxies sometimes send dup headers
488              * if they are the same we ignore the second
489              * if not, we raise an error */
490             const buffer *vb =
491               http_header_request_get(r, HTTP_HEADER_IF_MODIFIED_SINCE,
492                                       CONST_STR_LEN("If-Modified-Since"));
493             if (vb && buffer_eq_icase_slen(vb, v, vlen)) {
494                 /* ignore it if they are the same */
495                 return 0; /* ignore header */
496             }
497             else {
498                 return http_request_header_line_invalid(r, 400, "duplicate If-Modified-Since header -> 400");
499             }
500         }
501         break;
502       case HTTP_HEADER_TRANSFER_ENCODING:
503         if (HTTP_VERSION_1_1 != r->http_version) {
504             return http_request_header_line_invalid(r, 400,
505               HTTP_VERSION_1_0 == r->http_version
506                 ? "HTTP/1.0 with Transfer-Encoding (bad HTTP/1.0 proxy?) -> 400"
507                 : "HTTP/2 with Transfer-Encoding is invalid -> 400");
508         }
509 
510         if (!buffer_eq_icase_ss(v, vlen, CONST_STR_LEN("chunked"))) {
511             /* Transfer-Encoding might contain additional encodings,
512              * which are not currently supported by lighttpd */
513             return http_request_header_line_invalid(r, 501, NULL); /* Not Implemented */
514         }
515         r->reqbody_length = -1;
516 
517         /* Transfer-Encoding is a hop-by-hop header,
518          * which must not be blindly forwarded to backends */
519         return 0; /* skip header */
520     }
521 
522     http_header_request_append(r, id, k, klen, v, vlen);
523 
524     if (saveb) {
525         *saveb = http_header_request_get(r, id, k, klen);
526     }
527 
528     return 0;
529 }
530 
531 __attribute_cold__
532 static int http_request_parse_proto_loose(request_st * const restrict r, const char * const restrict ptr, const size_t len, const unsigned int http_parseopts) {
533     const char * proto = memchr(ptr, ' ', len);
534     if (NULL == proto)
535         return http_request_header_line_invalid(r, 400, "incomplete request line -> 400");
536     proto = memchr(proto+1, ' ', len - (proto+1 - ptr));
537     if (NULL == proto)
538         return http_request_header_line_invalid(r, 400, "incomplete request line -> 400");
539     ++proto;
540 
541     if (proto[0]=='H' && proto[1]=='T' && proto[2]=='T' && proto[3]=='P' && proto[4] == '/') {
542         if (proto[5] == '1' && proto[6] == '.' && (proto[7] == '1' || proto[7] == '0')) {
543             /* length already checked before calling this routine */
544             /* (len != (size_t)(proto - ptr + 8)) */
545             if (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT) /*(http_header_strict)*/
546                 return http_request_header_line_invalid(r, 400, "incomplete request line -> 400");
547             r->http_version = (proto[7] == '1') ? HTTP_VERSION_1_1 : HTTP_VERSION_1_0;
548         }
549         else
550             return http_request_header_line_invalid(r, 505, "unknown HTTP version -> 505");
551     }
552     else
553         return http_request_header_line_invalid(r, 400, "unknown protocol -> 400");
554 
555     /* keep-alive default: HTTP/1.1 -> true; HTTP/1.0 -> false */
556     r->keep_alive = (HTTP_VERSION_1_0 != r->http_version);
557 
558     return 0;
559 }
560 
561 __attribute_cold__
562 static const char * http_request_parse_reqline_uri(request_st * const restrict r, const char * const restrict uri, const size_t len, const unsigned int http_parseopts) {
563     const char *nuri;
564     if ((len > 7 && buffer_eq_icase_ssn(uri, "http://", 7)
565         && NULL != (nuri = memchr(uri + 7, '/', len-7)))
566        ||
567        (len > 8 && buffer_eq_icase_ssn(uri, "https://", 8)
568         && NULL != (nuri = memchr(uri + 8, '/', len-8)))) {
569         const char * const host = uri + (uri[4] == ':' ? 7 : 8);
570         const size_t hostlen = nuri - host;
571         if (0 == hostlen || hostlen >= 1024) { /*(expecting < 256)*/
572             http_request_header_line_invalid(r, 400, "uri-authority empty or too long -> 400");
573             return NULL;
574         }
575         /* Insert as host header */
576         http_header_request_set(r, HTTP_HEADER_HOST, CONST_STR_LEN("Host"), host, hostlen);
577         r->http_host = http_header_request_get(r, HTTP_HEADER_HOST, CONST_STR_LEN("Host"));
578         return nuri;
579     } else if (!(http_parseopts & HTTP_PARSEOPT_HEADER_STRICT) /*(!http_header_strict)*/
580            || (HTTP_METHOD_CONNECT == r->http_method && (uri[0] == ':' || light_isdigit(uri[0])))
581            || (HTTP_METHOD_OPTIONS == r->http_method && uri[0] == '*' && 1 == len)) {
582         /* (permitted) */
583         return uri;
584     } else {
585         http_request_header_line_invalid(r, 400, "request-URI parse error -> 400");
586         return NULL;
587     }
588 }
589 
590 
591 __attribute_cold__
592 __attribute_noinline__
593 static int http_request_parse_header_other(request_st * const restrict r, const char * const restrict k, const int klen, const unsigned int http_header_strict);
594 
595 
596 int
597 http_request_validate_pseudohdrs (request_st * const restrict r, const int scheme, const unsigned int http_parseopts)
598 {
599     /* :method is required to indicate method
600      * CONNECT method must have :method and :authority
601      * All other methods must have at least :method :scheme :path */
602 
603     if (HTTP_METHOD_UNSET == r->http_method)
604         return http_request_header_line_invalid(r, 400,
605           "missing pseudo-header method -> 400");
606 
607     if (HTTP_METHOD_CONNECT != r->http_method) {
608         if (!scheme)
609             return http_request_header_line_invalid(r, 400,
610               "missing pseudo-header scheme -> 400");
611 
612         if (buffer_string_is_empty(&r->target))
613             return http_request_header_line_invalid(r, 400,
614               "missing pseudo-header path -> 400");
615 
616         const char * const uri = r->target.ptr;
617         if (*uri != '/') { /* (common case: (*uri == '/')) */
618             if (uri[0] != '*' || uri[1] != '\0'
619                 || HTTP_METHOD_OPTIONS != r->http_method)
620                 return http_request_header_line_invalid(r, 400,
621                   "invalid pseudo-header path -> 400");
622         }
623     }
624     else { /* HTTP_METHOD_CONNECT */
625         if (NULL == r->http_host)
626             return http_request_header_line_invalid(r, 400,
627               "missing pseudo-header authority -> 400");
628         if (!buffer_string_is_empty(&r->target) || scheme)
629             return http_request_header_line_invalid(r, 400,
630               "invalid pseudo-header with CONNECT -> 400");
631         /*(reuse uri and ulen to assign to r->target)*/
632         buffer_copy_buffer(&r->target, r->http_host);
633     }
634     buffer_copy_buffer(&r->target_orig, &r->target);
635 
636     /* r->http_host, if set, is checked with http_request_host_policy()
637      * in http_request_parse() */
638 
639     /* copied and modified from end of http_request_parse_reqline() */
640 
641     /* check uri for invalid characters */
642     const unsigned int http_header_strict =
643       (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT);
644     if (http_header_strict
645         && (http_parseopts & HTTP_PARSEOPT_URL_NORMALIZE_CTRLS_REJECT))
646         return 0; /* URI will be checked in http_request_parse_target() */
647 
648     const uint32_t ulen = buffer_string_length(&r->target);
649     const uint8_t * const uri = (uint8_t *)r->target.ptr;
650     if (http_header_strict) {
651         for (uint32_t i = 0; i < ulen; ++i) {
652             if (!request_uri_is_valid_char(uri[i]))
653                 return http_request_header_char_invalid(r, uri[i],
654                   "invalid character in URI -> 400");
655         }
656     }
657     else {
658         if (NULL != memchr(uri, '\0', ulen))
659             return http_request_header_char_invalid(r, '\0',
660               "invalid character in header -> 400");
661     }
662 
663     return 0;
664 }
665 
666 
667 int
668 http_request_parse_header (request_st * const restrict r, http_header_parse_ctx * const restrict hpctx)
669 {
670     /* Note: k and v might not be '\0' terminated strings;
671      * care must be taken to avoid libc funcs which expect z-strings */
672     const char * const restrict k = hpctx->k;
673     const char * const restrict v = hpctx->v;
674     const uint32_t klen = hpctx->klen;
675     const uint32_t vlen = hpctx->vlen;
676 
677     if (0 == klen)
678         return http_request_header_line_invalid(r, 400,
679           "invalid header key -> 400");
680     if (0 == vlen)
681         return http_request_header_line_invalid(r, 400,
682           "invalid header value -> 400");
683 
684     if ((hpctx->hlen += klen + vlen + 4) > hpctx->max_request_field_size) {
685         /*(configurable with server.max-request-field-size; default 8k)*/
686       #if 1 /* emit to error log for people sending large headers */
687         log_error(r->conf.errh, __FILE__, __LINE__,
688                   "oversized request header -> 431");
689         return 431; /* Request Header Fields Too Large */
690       #else
691         /* 431 Request Header Fields Too Large */
692         return http_request_header_line_invalid(r, 431,
693           "oversized request header -> 431");
694       #endif
695     }
696 
697     if (2 == klen && k[0] == 't' && k[1] == 'e'
698         && !buffer_eq_icase_ss(v, vlen, CONST_STR_LEN("trailers")))
699         return http_request_header_line_invalid(r, 400,
700           "invalid TE header value with HTTP/2 -> 400");
701 
702     if (!hpctx->trailers) {
703         if (*k == ':') {
704             /* HTTP/2 request pseudo-header fields */
705             if (!hpctx->pseudo) /*(pseudo header after non-pseudo header)*/
706                 return http_request_header_line_invalid(r, 400,
707                   "invalid pseudo-header -> 400");
708             switch (klen-1) {
709               case 4:
710                 if (0 == memcmp(k+1, "path", 4)) {
711                     if (!buffer_string_is_empty(&r->target))
712                         return http_request_header_line_invalid(r, 400,
713                           "repeated pseudo-header -> 400");
714                     buffer_copy_string_len(&r->target, v, vlen);
715                     return 0;
716                 }
717                 break;
718               case 6:
719                 if (0 == memcmp(k+1, "method", 6)) {
720                     if (HTTP_METHOD_UNSET != r->http_method)
721                         return http_request_header_line_invalid(r, 400,
722                           "repeated pseudo-header -> 400");
723                     r->http_method = get_http_method_key(v, vlen);
724                     if (HTTP_METHOD_UNSET >= r->http_method)
725                         return http_request_header_line_invalid(r, 501,
726                           "unknown http-method -> 501");
727                     return 0;
728                 }
729                 else if (0 == memcmp(k+1, "scheme", 6)) {
730                     if (hpctx->scheme)
731                         return http_request_header_line_invalid(r, 400,
732                           "repeated pseudo-header -> 400");
733                     switch (vlen) {/*(validated, but then ignored)*/
734                       case 5: /* "https" */
735                         if (v[4]!='s') break;
736                         __attribute_fallthrough__
737                       case 4: /* "http" */
738                         if (v[0]=='h' && v[1]=='t' && v[2]=='t' && v[3]=='p') {
739                             hpctx->scheme = 1;
740                             return 0;
741                         }
742                         break;
743                       default:
744                         break;
745                     }
746                     return http_request_header_line_invalid(r, 400,
747                       "unknown pseudo-header scheme -> 400");
748                 }
749                 break;
750               case 9:
751                 if (0 == memcmp(k+1, "authority", 9)) {
752                     if (r->http_host)
753                         return http_request_header_line_invalid(r, 400,
754                           "repeated pseudo-header -> 400");
755                     if (vlen >= 1024) /*(expecting < 256)*/
756                         return http_request_header_line_invalid(r, 400,
757                           "invalid pseudo-header authority too long -> 400");
758                     /* insert as host header */
759                     http_header_request_set(r, HTTP_HEADER_HOST,
760                                             CONST_STR_LEN("host"), v, vlen);
761                     r->http_host =
762                       http_header_request_get(r, HTTP_HEADER_HOST,
763                                               CONST_STR_LEN("Host"));
764                     return 0;
765                 }
766                 break;
767               default:
768                 break;
769             }
770             return http_request_header_line_invalid(r, 400,
771               "invalid pseudo-header -> 400");
772         }
773         else { /*(non-pseudo headers)*/
774             if (hpctx->pseudo) { /*(transition to non-pseudo headers)*/
775                 hpctx->pseudo = 0;
776                 int status =
777                   http_request_validate_pseudohdrs(r, hpctx->scheme,
778                                                    hpctx->http_parseopts);
779                 if (0 != status) return status;
780             }
781 
782             const unsigned int http_header_strict =
783               (hpctx->http_parseopts & HTTP_PARSEOPT_HEADER_STRICT);
784 
785             for (uint32_t j = 0; j < klen; ++j) {
786                 if (light_islower(k[j]) || k[j] == '-')
787                     continue; /*(common cases)*/
788                 if (light_isupper(k[j]))
789                     return 400;
790                 if (0 != http_request_parse_header_other(r, k+j, klen-j,
791                                                          http_header_strict))
792                     return 400;
793                 break;
794             }
795 
796             if (http_header_strict) {
797                 for (uint32_t j = 0; j < vlen; ++j) {
798                     if ((((uint8_t *)v)[j] < 32 && v[j] != '\t') || v[j]==127)
799                         return http_request_header_char_invalid(r, v[j],
800                           "invalid character in header -> 400");
801                 }
802             }
803             else {
804                 if (NULL != memchr(v, '\0', vlen))
805                     return http_request_header_char_invalid(r, '\0',
806                       "invalid character in header -> 400");
807             }
808 
809             const enum http_header_e id =
810               hpctx->id ? hpctx->id : http_header_hkey_get_lc(k, klen);
811             return http_request_parse_single_header(r, id, k, klen, v, vlen);
812         }
813     }
814     else { /*(trailers)*/
815         /* ignore trailers (after required HPACK decoding) if streaming
816          * request body to backend since headers have already been sent
817          * to backend via Common Gateway Interface (CGI) (CGI, FastCGI,
818          * SCGI, etc) or HTTP/1.1 (proxy) (mod_proxy does not currently
819          * support using HTTP/2 to connect to backends) */
820       #if 0 /* (if needed, save flag in hpctx instead of fdevent.h dependency)*/
821         if (r->conf.stream_request_body & FDEVENT_STREAM_REQUEST)
822             return 0;
823       #endif
824         /* Note: do not unconditionally merge into headers since if
825          * headers had already been sent to backend, then mod_accesslog
826          * logging of request headers might be inaccurate.
827          * Many simple backends do not support HTTP/1.1 requests sending
828          * Transfer-Encoding: chunked, and even those that do might not
829          * handle trailers.  Some backends do not even support HTTP/1.1.
830          * For all these reasons, ignore trailers if streaming request
831          * body to backend.  Revisit in future if adding support for
832          * connecting to backends using HTTP/2 (with explicit config
833          * option to force connecting to backends using HTTP/2) */
834 
835         /* XXX: TODO: request trailers not handled if streaming reqbody
836          * XXX: must ensure that trailers are not disallowed field-names
837          */
838 
839         return 0;
840     }
841 }
842 
843 
844 static int http_request_parse_reqline(request_st * const restrict r, const char * const restrict ptr, const unsigned short * const restrict hoff, const unsigned int http_parseopts) {
845     size_t len = hoff[2];
846 
847     /* parse the first line of the request
848      * <method> <uri> <protocol>\r\n
849      * */
850     if (len < 13) /* minimum len with (!http_header_strict): "x x HTTP/1.0\n" */
851         return http_request_header_line_invalid(r, 400, "invalid request line (too short) -> 400");
852     if (ptr[len-2] == '\r')
853         len-=2;
854     else if (!(http_parseopts & HTTP_PARSEOPT_HEADER_STRICT)) /*(!http_header_strict)*/
855         len-=1;
856     else
857         return http_request_header_line_invalid(r, 400, "missing CR before LF in header -> 400");
858 
859     /*
860      * RFC7230:
861      *   HTTP-version  = HTTP-name "/" DIGIT "." DIGIT
862      *   HTTP-name     = %x48.54.54.50 ; "HTTP", case-sensitive
863      */
864 
865     /* protocol is expected to be " HTTP/1.1" or " HTTP/1.0" at end of line */
866     union proto_un {
867       char c[8];
868       uint64_t u;
869     };
870     static const union proto_un http_1_1 = {{'H','T','T','P','/','1','.','1'}};
871     static const union proto_un http_1_0 = {{'H','T','T','P','/','1','.','0'}};
872     const char *p = ptr + len - 8;
873     union proto_un proto8;
874     proto8.c[0]=p[0]; proto8.c[1]=p[1]; proto8.c[2]=p[2]; proto8.c[3]=p[3];
875     proto8.c[4]=p[4]; proto8.c[5]=p[5]; proto8.c[6]=p[6]; proto8.c[7]=p[7];
876     if (p[-1] == ' ' && http_1_1.u == proto8.u) {
877         r->http_version = HTTP_VERSION_1_1;
878         r->keep_alive = 1; /* keep-alive default: HTTP/1.1 -> true */
879     }
880     else if (p[-1] == ' ' && http_1_0.u == proto8.u) {
881         r->http_version = HTTP_VERSION_1_0;
882         r->keep_alive = 0; /* keep-alive default: HTTP/1.0 -> false */
883     }
884     else {
885         int status = http_request_parse_proto_loose(r,ptr,len,http_parseopts);
886         if (0 != status) return status;
887         /*(space char must exist if http_request_parse_proto_loose() succeeds)*/
888         for (p = ptr + len - 9; p[-1] != ' '; --p) ;
889     }
890 
891     /* method is expected to be a short string in the general case */
892     size_t i = 0;
893     while (ptr[i] != ' ') ++i;
894   #if 0 /*(space must exist if protocol was parsed successfully)*/
895     while (i < len && ptr[i] != ' ') ++i;
896     if (ptr[i] != ' ')
897         return http_request_header_line_invalid(r, 400, "incomplete request line -> 400");
898   #endif
899 
900     r->http_method = get_http_method_key(ptr, i);
901     if (HTTP_METHOD_UNSET >= r->http_method)
902         return http_request_header_line_invalid(r, 501, "unknown http-method -> 501");
903 
904     const char *uri = ptr + i + 1;
905 
906     if (uri == p)
907         return http_request_header_line_invalid(r, 400, "no uri specified -> 400");
908     len = (size_t)(p - uri - 1);
909 
910     if (*uri != '/') { /* (common case: (*uri == '/')) */
911         uri = http_request_parse_reqline_uri(r, uri, len, http_parseopts);
912         if (NULL == uri) return 400;
913         len = (size_t)(p - uri - 1);
914     }
915 
916     if (0 == len)
917         return http_request_header_line_invalid(r, 400, "no uri specified -> 400");
918 
919     /* check uri for invalid characters */
920     if (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT) { /* http_header_strict */
921         if ((http_parseopts & HTTP_PARSEOPT_URL_NORMALIZE_CTRLS_REJECT)) {
922             /* URI will be checked in http_request_parse_target() */
923         }
924         else {
925             for (i = 0; i < len; ++i) {
926                 if (!request_uri_is_valid_char(uri[i]))
927                     return http_request_header_char_invalid(r, uri[i], "invalid character in URI -> 400");
928             }
929         }
930     }
931     else {
932         /* check entire set of request headers for '\0' */
933         if (NULL != memchr(ptr, '\0', hoff[hoff[0]]))
934             return http_request_header_char_invalid(r, '\0', "invalid character in header -> 400");
935     }
936 
937     buffer_copy_string_len(&r->target, uri, len);
938     buffer_copy_string_len(&r->target_orig, uri, len);
939     return 0;
940 }
941 
942 int http_request_parse_target(request_st * const r, int scheme_port) {
943     /* URI is parsed into components at start of request and may
944      * also be re-parsed upon HANDLER_COMEBACK during the request
945      * r->target is expected to be a "/url-part?query-part"
946      *   (and *not* a fully-qualified URI starting https://...)
947      * r->uri.authority is expected to be parsed elsewhere into r->http_host
948      */
949 
950     /**
951      * prepare strings
952      *
953      * - uri.path
954      * - uri.query
955      *
956      */
957 
958     /**
959      * Name according to RFC 2396
960      *
961      * - scheme
962      * - authority
963      * - path
964      * - query
965      *
966      * (scheme)://(authority)(path)?(query)#fragment
967      *
968      */
969 
970     /* take initial scheme value from connection-level state
971      * (request r->uri.scheme can be overwritten for later,
972      *  for example by mod_extforward or mod_magnet) */
973     buffer_copy_string_len(&r->uri.scheme, "https", scheme_port == 443 ? 5 : 4);
974 
975     buffer * const target = &r->target;
976     if (r->http_method == HTTP_METHOD_CONNECT
977         || (r->http_method == HTTP_METHOD_OPTIONS
978             && target->ptr[0] == '*'
979             && target->ptr[1] == '\0')) {
980         /* CONNECT ... (or) OPTIONS * ... */
981         buffer_copy_buffer(&r->uri.path, target);
982         buffer_clear(&r->uri.query);
983         return 0;
984     }
985 
986     char *qstr;
987     if (r->conf.http_parseopts & HTTP_PARSEOPT_URL_NORMALIZE) {
988         /*uint32_t len = (uint32_t)buffer_string_length(target);*/
989         int qs = burl_normalize(target, r->tmp_buf, r->conf.http_parseopts);
990         if (-2 == qs)
991             return http_request_header_line_invalid(r, 400,
992               "invalid character in URI -> 400"); /* Bad Request */
993         qstr = (-1 == qs) ? NULL : target->ptr+qs;
994       #if 0  /* future: might enable here, or below for all requests */
995         /* (Note: total header size not recalculated on HANDLER_COMEBACK
996          *  even if other request headers changed during processing)
997          * (If (0 != r->loops_per_request), then the generated
998          *  request is too large.  Should a different error be returned?) */
999         r->rqst_header_len -= len;
1000         len = buffer_string_length(target);
1001         r->rqst_header_len += len;
1002         if (len > MAX_HTTP_REQUEST_URI) {
1003             return 414; /* 414 URI Too Long */
1004         }
1005         if (r->rqst_header_len > MAX_HTTP_REQUEST_HEADER) {
1006             log_error(r->conf.errh, __FILE__, __LINE__,
1007               "request header fields too large: %u -> 431",
1008               r->rqst_header_len);
1009             return 431; /* Request Header Fields Too Large */
1010         }
1011       #endif
1012     }
1013     else {
1014         size_t rlen = buffer_string_length(target);
1015         qstr = memchr(target->ptr, '#', rlen);/* discard fragment */
1016         if (qstr) {
1017             rlen = (size_t)(qstr - target->ptr);
1018             buffer_string_set_length(target, rlen);
1019         }
1020         qstr = memchr(target->ptr, '?', rlen);
1021     }
1022 
1023     /** extract query string from target */
1024     const char * const pstr = target->ptr;
1025     const uint32_t rlen = buffer_string_length(target);
1026     uint32_t plen;
1027     if (NULL != qstr) {
1028         plen = (uint32_t)(qstr - pstr);
1029         buffer_copy_string_len(&r->uri.query, qstr + 1, rlen - plen - 1);
1030     }
1031     else {
1032         plen = rlen;
1033         buffer_clear(&r->uri.query);
1034     }
1035     buffer_copy_string_len(&r->uri.path, pstr, plen);
1036 
1037     /* decode url to path
1038      *
1039      * - decode url-encodings  (e.g. %20 -> ' ')
1040      * - remove path-modifiers (e.g. /../)
1041      */
1042 
1043     buffer_urldecode_path(&r->uri.path);
1044     buffer_path_simplify(&r->uri.path, &r->uri.path);
1045     if (r->uri.path.ptr[0] != '/')
1046         return http_request_header_line_invalid(r, 400,
1047           "uri-path does not begin with '/' -> 400"); /* Bad Request */
1048 
1049     return 0;
1050 }
1051 
1052 __attribute_cold__
1053 __attribute_noinline__
1054 static int http_request_parse_header_other(request_st * const restrict r, const char * const restrict k, const int klen, const unsigned int http_header_strict) {
1055     for (int i = 0; i < klen; ++i) {
1056         if (light_isalpha(k[i]) || k[i] == '-') continue; /*(common cases)*/
1057         /**
1058          * 1*<any CHAR except CTLs or separators>
1059          * CTLs == 0-31 + 127, CHAR = 7-bit ascii (0..127)
1060          *
1061          */
1062         switch(k[i]) {
1063         case ' ':
1064         case '\t':
1065             return http_request_header_line_invalid(r, 400, "WS character in key -> 400");
1066         case '(':
1067         case ')':
1068         case '<':
1069         case '>':
1070         case '@':
1071         case ',':
1072         case ';':
1073         case '\\':
1074         case '\"':
1075         case '/':
1076         case '[':
1077         case ']':
1078         case '?':
1079         case '=':
1080         case '{':
1081         case '}':
1082             return http_request_header_char_invalid(r, k[i], "invalid character in header key -> 400");
1083         default:
1084             if (http_header_strict ? (k[i] < 32 || ((unsigned char *)k)[i] >= 127) : k[i] == '\0')
1085                 return http_request_header_char_invalid(r, k[i], "invalid character in header key -> 400");
1086             break; /* ok */
1087         }
1088     }
1089     return 0;
1090 }
1091 
1092 static int http_request_parse_headers(request_st * const restrict r, char * const restrict ptr, const unsigned short * const restrict hoff, const unsigned int http_parseopts) {
1093     const unsigned int http_header_strict = (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT);
1094 
1095   #if 0 /*(not checked here; will later result in invalid label for HTTP header)*/
1096     int i = hoff[2];
1097 
1098     if (ptr[i] == ' ' || ptr[i] == '\t') {
1099         return http_request_header_line_invalid(r, 400, "WS at the start of first line -> 400");
1100     }
1101   #endif
1102 
1103     for (int i = 2; i < hoff[0]; ++i) {
1104         const char *k = ptr + hoff[i];
1105         /* one past last line hoff[hoff[0]] is to final "\r\n" */
1106         char *end = ptr + hoff[i+1];
1107 
1108         const char *colon = memchr(k, ':', end - k);
1109         if (NULL == colon)
1110             return http_request_header_line_invalid(r, 400, "invalid header missing ':' -> 400");
1111 
1112         const char *v = colon + 1;
1113 
1114         /* RFC7230 Hypertext Transfer Protocol (HTTP/1.1): Message Syntax and Routing
1115          * 3.2.4.  Field Parsing
1116          * [...]
1117          * No whitespace is allowed between the header field-name and colon.  In
1118          * the past, differences in the handling of such whitespace have led to
1119          * security vulnerabilities in request routing and response handling.  A
1120          * server MUST reject any received request message that contains
1121          * whitespace between a header field-name and colon with a response code
1122          * of 400 (Bad Request).  A proxy MUST remove any such whitespace from a
1123          * response message before forwarding the message downstream.
1124          */
1125         /* (line k[-1] is always preceded by a '\n',
1126          *  including first header after request-line,
1127          *  so no need to check colon != k) */
1128         if (colon[-1] == ' ' || colon[-1] == '\t') {
1129             if (http_header_strict) {
1130                 return http_request_header_line_invalid(r, 400, "invalid whitespace between field-name and colon -> 400");
1131             }
1132             else {
1133                 /* remove trailing whitespace from key(if !http_header_strict)*/
1134                 do { --colon; } while (colon[-1] == ' ' || colon[-1] == '\t');
1135             }
1136         }
1137 
1138         const int klen = (int)(colon - k);
1139         if (0 == klen)
1140             return http_request_header_line_invalid(r, 400, "invalid header key -> 400");
1141         const enum http_header_e id = http_header_hkey_get(k, klen);
1142 
1143         if (id == HTTP_HEADER_OTHER) {
1144             for (int j = 0; j < klen; ++j) {
1145                 if (light_isalpha(k[j]) || k[j] == '-') continue; /*(common cases)*/
1146                 if (0 != http_request_parse_header_other(r, k+j, klen-j, http_header_strict))
1147                     return 400;
1148                 break;
1149             }
1150         }
1151 
1152         /* remove leading whitespace from value */
1153         while (*v == ' ' || *v == '\t') ++v;
1154 
1155         for (; i+1 <= hoff[0]; ++i) {
1156             end = ptr + hoff[i+1];
1157             if (end[0] != ' ' && end[0] != '\t') break;
1158 
1159             /* line folding */
1160           #ifdef __COVERITY__
1161             force_assert(end - k >= 2);
1162           #endif
1163             if (end[-2] == '\r')
1164                 end[-2] = ' ';
1165             else if (http_header_strict)
1166                 return http_request_header_line_invalid(r, 400, "missing CR before LF in header -> 400");
1167             end[-1] = ' ';
1168         }
1169       #ifdef __COVERITY__
1170         /*(buf holding k has non-zero request-line, so end[-2] valid)*/
1171         force_assert(end >= k + 2);
1172       #endif
1173         if (end[-2] == '\r')
1174             --end;
1175         else if (http_header_strict)
1176             return http_request_header_line_invalid(r, 400, "missing CR before LF in header -> 400");
1177         /* remove trailing whitespace from value (+ remove '\r\n') */
1178         /* (line k[-1] is always preceded by a '\n',
1179          *  including first header after request-line,
1180          *  so no need to check (end != k)) */
1181         do { --end; } while (end[-1] == ' ' || end[-1] == '\t');
1182 
1183         const int vlen = (int)(end - v);
1184         /* empty header-fields are not allowed by HTTP-RFC, we just ignore them */
1185         if (vlen <= 0) continue; /* ignore header */
1186 
1187         if (http_header_strict) {
1188             for (int j = 0; j < vlen; ++j) {
1189                 if ((((unsigned char *)v)[j] < 32 && v[j] != '\t') || v[j]==127)
1190                     return http_request_header_char_invalid(r, v[j], "invalid character in header -> 400");
1191             }
1192         } /* else URI already checked in http_request_parse_reqline() for any '\0' */
1193 
1194         int status = http_request_parse_single_header(r, id, k, (size_t)klen, v, (size_t)vlen);
1195         if (0 != status) return status;
1196     }
1197 
1198     return 0;
1199 }
1200 
1201 
1202 static int
1203 http_request_parse (request_st * const restrict r, const int scheme_port)
1204 {
1205     int status = http_request_parse_target(r, scheme_port);
1206     if (0 != status) return status;
1207 
1208     /*(r->http_host might not be set until after parsing request headers)*/
1209     buffer_copy_buffer(&r->uri.authority, r->http_host);/*(copy even if empty)*/
1210     buffer_to_lower(&r->uri.authority);
1211 
1212     /* post-processing */
1213     const unsigned int http_parseopts = r->conf.http_parseopts;
1214 
1215     /* check hostname field if it is set */
1216     if (r->http_host) {
1217         if (0 != http_request_host_policy(r->http_host,
1218                                           http_parseopts, scheme_port))
1219             return http_request_header_line_invalid(r, 400, "Invalid Hostname -> 400");
1220     }
1221     else {
1222         if (r->http_version >= HTTP_VERSION_1_1)
1223             return http_request_header_line_invalid(r, 400, "HTTP/1.1 but Host missing -> 400");
1224     }
1225 
1226     if (0 == r->reqbody_length) {
1227         /* POST requires Content-Length (or Transfer-Encoding)
1228          * (-1 == r->reqbody_length when Transfer-Encoding: chunked)*/
1229         if (HTTP_METHOD_POST == r->http_method
1230             && !light_btst(r->rqst_htags, HTTP_HEADER_CONTENT_LENGTH)) {
1231             return http_request_header_line_invalid(r, 411, "POST-request, but content-length missing -> 411");
1232         }
1233     }
1234     else {
1235         /* (-1 == r->reqbody_length when Transfer-Encoding: chunked)*/
1236         if (-1 == r->reqbody_length
1237             && light_btst(r->rqst_htags, HTTP_HEADER_CONTENT_LENGTH)) {
1238             /* RFC7230 Hypertext Transfer Protocol (HTTP/1.1): Message Syntax and Routing
1239              * 3.3.3.  Message Body Length
1240              * [...]
1241              * If a message is received with both a Transfer-Encoding and a
1242              * Content-Length header field, the Transfer-Encoding overrides the
1243              * Content-Length.  Such a message might indicate an attempt to
1244              * perform request smuggling (Section 9.5) or response splitting
1245              * (Section 9.4) and ought to be handled as an error.  A sender MUST
1246              * remove the received Content-Length field prior to forwarding such
1247              * a message downstream.
1248              */
1249             const unsigned int http_header_strict =
1250               (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT);
1251             if (http_header_strict) {
1252                 return http_request_header_line_invalid(r, 400, "invalid Transfer-Encoding + Content-Length -> 400");
1253             }
1254             else {
1255                 /* ignore Content-Length */
1256                 http_header_request_unset(r, HTTP_HEADER_CONTENT_LENGTH, CONST_STR_LEN("Content-Length"));
1257             }
1258         }
1259         if (http_method_get_or_head(r->http_method)
1260             && !(http_parseopts & HTTP_PARSEOPT_METHOD_GET_BODY)) {
1261             return http_request_header_line_invalid(r, 400, "GET/HEAD with content-length -> 400");
1262         }
1263     }
1264 
1265     return 0;
1266 }
1267 
1268 
1269 static int
1270 http_request_parse_hoff (request_st * const restrict r, char * const restrict hdrs, const unsigned short * const restrict hoff, const int scheme_port)
1271 {
1272     /*
1273      * Request: "^(GET|POST|HEAD|...) ([^ ]+(\\?[^ ]+|)) (HTTP/1\\.[01])$"
1274      * Header : "^([-a-zA-Z]+): (.+)$"
1275      * End    : "^$"
1276      */
1277 
1278     int status;
1279     const unsigned int http_parseopts = r->conf.http_parseopts;
1280 
1281     status = http_request_parse_reqline(r, hdrs, hoff, http_parseopts);
1282     if (0 != status) return status;
1283 
1284     status = http_request_parse_headers(r, hdrs, hoff, http_parseopts);
1285     if (0 != status) return status;
1286 
1287     return http_request_parse(r, scheme_port);
1288 }
1289 
1290 
1291 static void
1292 http_request_headers_fin (request_st * const restrict r)
1293 {
1294     if (0 == r->http_status) {
1295       #if 0
1296         r->conditional_is_valid = (1 << COMP_SERVER_SOCKET)
1297                                 | (1 << COMP_HTTP_SCHEME)
1298                                 | (1 << COMP_HTTP_HOST)
1299                                 | (1 << COMP_HTTP_REMOTE_IP)
1300                                 | (1 << COMP_HTTP_REQUEST_METHOD)
1301                                 | (1 << COMP_HTTP_URL)
1302                                 | (1 << COMP_HTTP_QUERY_STRING)
1303                                 | (1 << COMP_HTTP_REQUEST_HEADER);
1304       #else
1305         /* all config conditions are valid after parsing header
1306          * (set all bits; remove dependency on plugin_config.h) */
1307         r->conditional_is_valid = ~0u;
1308       #endif
1309     }
1310     else {
1311         r->keep_alive = 0;
1312         r->reqbody_length = 0;
1313     }
1314 }
1315 
1316 
1317 void
1318 http_request_headers_process (request_st * const restrict r, char * const restrict hdrs, const unsigned short * const restrict hoff, const int scheme_port)
1319 {
1320     r->http_status = http_request_parse_hoff(r, hdrs, hoff, scheme_port);
1321 
1322     http_request_headers_fin(r);
1323 
1324     if (0 != r->http_status) {
1325         if (r->conf.log_request_header_on_error) {
1326             /*(http_request_parse_headers() modifies hdrs only to
1327              * undo line-wrapping in-place using spaces)*/
1328             log_error(r->conf.errh, __FILE__, __LINE__,
1329               "request-header:\n%.*s", (int)r->rqst_header_len, hdrs);
1330         }
1331     }
1332 }
1333 
1334 
1335 void
1336 http_request_headers_process_h2 (request_st * const restrict r, const int scheme_port)
1337 {
1338     if (0 == r->http_status)
1339         r->http_status = http_request_parse(r, scheme_port);
1340 
1341     if (0 == r->http_status) {
1342         if (light_btst(r->rqst_htags, HTTP_HEADER_CONNECTION))
1343             r->http_status = http_request_header_line_invalid(r, 400,
1344               "invalid Connection header with HTTP/2 -> 400");
1345     }
1346 
1347     http_request_headers_fin(r);
1348 
1349     /* limited; headers not collected into a single buf for HTTP/2 */
1350     if (__builtin_expect( (0 != r->http_status), 0)) {
1351         if (r->conf.log_request_header_on_error) {
1352             log_error(r->conf.errh, __FILE__, __LINE__,
1353               "request-header:\n:authority: %s\n:method: %s\n:path: %s",
1354               r->http_host ? r->http_host->ptr : "",
1355               (HTTP_METHOD_UNSET != r->http_method)
1356                 ? get_http_method_name(r->http_method)
1357                 : "",
1358               !buffer_string_is_empty(&r->target) ? r->target.ptr : "");
1359         }
1360     }
1361 
1362     /* ignore Upgrade if using HTTP/2 */
1363     if (light_btst(r->rqst_htags, HTTP_HEADER_UPGRADE))
1364         http_header_request_unset(r, HTTP_HEADER_UPGRADE,
1365                                   CONST_STR_LEN("upgrade"));
1366     /* XXX: should filter out other hop-by-hop connection headers, too */
1367 }
1368