xref: /lighttpd1.4/src/request.c (revision e78cd765)
1 /*
2  * request - HTTP request processing
3  *
4  * Fully-rewritten from original
5  * Copyright(c) 2018 Glenn Strauss gstrauss()gluelogic.com  All rights reserved
6  * License: BSD 3-clause (same as lighttpd)
7  */
8 #include "first.h"
9 
10 #include "request.h"
11 #include "burl.h"
12 #include "http_header.h"
13 #include "http_kv.h"
14 #include "log.h"
15 #include "sock_addr.h"
16 
17 #include <limits.h>
18 #include <stdint.h>
19 #include <stdlib.h>
20 #include <string.h>
21 
22 __attribute_noinline__
23 __attribute_nonnull__()
24 __attribute_pure__
25 static const char * http_request_check_uri_strict (const uint8_t * const restrict s, const uint_fast32_t len) {
26     for (uint_fast32_t i = 0; i < len; ++i) {
27         if (__builtin_expect( (s[i] <= 32),  0)) return (const char *)s+i;
28         if (__builtin_expect( (s[i] == 127), 0)) return (const char *)s+i;
29         if (__builtin_expect( (s[i] == 255), 0)) return (const char *)s+i;
30     }
31     return NULL;
32 }
33 
34 __attribute_nonnull__()
35 __attribute_pure__
36 static const char * http_request_check_line_strict (const char * const restrict s, const uint_fast32_t len) {
37     for (uint_fast32_t i = 0; i < len; ++i) {
38         if (__builtin_expect( (((const uint8_t *)s)[i]<32), 0) && s[i] != '\t')
39             return s+i;
40         if (__builtin_expect( (s[i] == 127), 0))
41             return s+i;
42     }
43     return NULL;
44 }
45 
46 __attribute_nonnull__()
47 __attribute_pure__
48 static const char * http_request_check_line_minimal (const char * const restrict s, const uint_fast32_t len) {
49     for (uint_fast32_t i = 0; i < len; ++i) {
50         if (__builtin_expect( (s[i] == '\0'), 0)) return s+i;
51         if (__builtin_expect( (s[i] == '\n'), 0)) return s+i;
52     }
53     return NULL;
54 }
55 
56 static int request_check_hostname(buffer * const host) {
57     /*
58      *       hostport      = host [ ":" port ]
59      *       host          = hostname | IPv4address | IPv6address
60      *       hostname      = *( domainlabel "." ) toplabel [ "." ]
61      *       domainlabel   = alphanum | alphanum *( alphanum | "-" ) alphanum
62      *       toplabel      = alpha | alpha *( alphanum | "-" ) alphanum
63      *       IPv4address   = 1*digit "." 1*digit "." 1*digit "." 1*digit
64      *       IPv6address   = "[" ... "]"
65      *       port          = *digit
66      */
67 
68     const char *h = host->ptr;
69 
70     if (*h != '[') {
71         uint32_t len = buffer_clen(host);
72         const char * const colon = memchr(h, ':', len);
73         uint32_t hlen = colon ? (uint32_t)(colon - h) : len;
74 
75         /* if hostname ends in ".", strip it */
76         if (__builtin_expect( (0 == hlen), 0)) return -1;
77         if (__builtin_expect( (h[hlen-1] == '.'), 0)) {
78             /* shift port info one left */
79             if (--hlen == 0) return -1;
80             --len;
81             if (NULL != colon)
82                 memmove(host->ptr+hlen, colon, len - hlen);
83             buffer_truncate(host, len);
84         }
85 
86         int label_len = 0;
87         int allnumeric = 1;
88         int numeric = 1;
89         int level = 0;
90         for (uint32_t i = 0; i < hlen; ++i) {
91             const int ch = h[i];
92             ++label_len;
93             if (light_isdigit(ch))
94                 continue;
95             else if ((light_isalpha(ch) || (ch == '-' && i != 0)))
96                 numeric = 0;
97             else if (ch == '.' && 1 != label_len && '-' != h[i+1]) {
98                 allnumeric &= numeric;
99                 numeric = 1;
100                 label_len = 0;
101                 ++level;
102             }
103             else
104                 return -1;
105         }
106         /* (if last segment numeric, then IPv4 and must have 4 numeric parts) */
107         if (0 == label_len || (numeric && (level != 3 || !allnumeric)))
108             return -1;
109 
110         h += hlen;
111     }
112     else {  /* IPv6 address */
113         /* check the address inside [...]; note: not fully validating */
114         /* (note: not allowing scoped literals, e.g. %eth0 suffix) */
115         ++h; /* step past '[' */
116         int cnt = 0;
117         while (light_isxdigit(*h) || *h == '.' || (*h == ':' && ++cnt < 8)) ++h;
118         /*(invalid char, too many ':', missing ']', or empty "[]")*/
119         if (*h != ']' || h - host->ptr == 1) return -1;
120         ++h; /* step past ']' */
121     }
122 
123     /* check numerical port, if present */
124     if (*h == ':') {
125         if (__builtin_expect( (h[1] == '\0'), 0)) /*(remove trailing colon)*/
126             buffer_truncate(host, h - host->ptr);
127         do { ++h; } while (light_isdigit(*h));
128     }
129 
130     return (*h == '\0') ? 0 : -1;
131 }
132 
133 int http_request_host_normalize(buffer * const b, const int scheme_port) {
134     /*
135      * check for and canonicalize numeric IP address and portnum (optional)
136      * (IP address may be followed by ":portnum" (optional))
137      * - IPv6: "[...]"
138      * - IPv4: "x.x.x.x"
139      * - IPv4: 12345678   (32-bit decimal number)
140      * - IPv4: 012345678  (32-bit octal number)
141      * - IPv4: 0x12345678 (32-bit hex number)
142      *
143      * allow any chars (except ':' and '\0' and stray '[' or ']')
144      *   (other code may check chars more strictly or more pedantically)
145      * ':'  delimits (optional) port at end of string
146      * "[]" wraps IPv6 address literal
147      * '\0' should have been rejected earlier were it present
148      *
149      * any chars includes, but is not limited to:
150      * - allow '-' any where, even at beginning of word
151      *     (security caution: might be confused for cmd flag if passed to shell)
152      * - allow all-digit TLDs
153      *     (might be mistaken for IPv4 addr by inet_aton()
154      *      unless non-digits appear in subdomain)
155      */
156 
157     /* Note: not using getaddrinfo() since it does not support "[]" around IPv6
158      * and is not as lenient as inet_aton() and inet_addr() for IPv4 strings.
159      * Not using inet_pton() (when available) on IPv4 for similar reasons. */
160 
161     const char * const p = b->ptr;
162     const size_t blen = buffer_clen(b);
163     long port = 0;
164 
165     if (*p != '[') {
166         char * const colon = (char *)memchr(p, ':', blen);
167         if (colon) {
168             if (*p == ':') return -1; /*(empty host then port, or naked IPv6)*/
169             if (colon[1] != '\0') {
170                 char *e;
171                 port = strtol(colon+1, &e, 0); /*(allow decimal, octal, hex)*/
172                 if (0 < port && port <= USHRT_MAX && *e == '\0') {
173                     /* valid port */
174                 } else {
175                     return -1;
176                 }
177             } /*(else ignore stray colon at string end)*/
178             buffer_truncate(b, (size_t)(colon - p)); /*(remove port str)*/
179         }
180 
181         if (light_isdigit(*p)) do {
182             /* (IPv4 address literal or domain starting w/ digit (e.g. 3com))*/
183             /* (check one-element cache of normalized IPv4 address string) */
184             static struct { char s[INET_ADDRSTRLEN]; size_t n; } laddr;
185             size_t n = colon ? (size_t)(colon - p) : blen;
186             sock_addr addr;
187             if (n == laddr.n && 0 == memcmp(p, laddr.s, n)) break;
188             if (1 == sock_addr_inet_pton(&addr, p, AF_INET, 0)) {
189                 sock_addr_inet_ntop_copy_buffer(b, &addr);
190                 n = buffer_clen(b);
191                 if (n < sizeof(laddr.s)) memcpy(laddr.s, b->ptr, (laddr.n = n));
192             }
193         } while (0);
194     } else do { /* IPv6 addr */
195       #if defined(HAVE_IPV6) && defined(HAVE_INET_PTON)
196 
197         /* (check one-element cache of normalized IPv4 address string) */
198         static struct { char s[INET6_ADDRSTRLEN]; size_t n; } laddr;
199         sock_addr addr;
200         char *bracket = b->ptr+blen-1;
201         char *percent = strchr(b->ptr+1, '%');
202         size_t len;
203         int rc;
204         char buf[INET6_ADDRSTRLEN+16]; /*(+16 for potential %interface name)*/
205         if (blen <= 2) return -1; /*(invalid "[]")*/
206         if (*bracket != ']') {
207             bracket = (char *)memchr(b->ptr+1, ']', blen-1);
208             if (NULL == bracket || bracket[1] != ':'  || bracket - b->ptr == 1){
209                return -1;
210             }
211             if (bracket[2] != '\0') { /*(ignore stray colon at string end)*/
212                 char *e;
213                 port = strtol(bracket+2, &e, 0); /*(allow decimal, octal, hex)*/
214                 if (0 < port && port <= USHRT_MAX && *e == '\0') {
215                     /* valid port */
216                 } else {
217                     return -1;
218                 }
219             }
220         }
221 
222         len = (size_t)((percent ? percent : bracket) - (b->ptr+1));
223         if (laddr.n == len && 0 == memcmp(laddr.s, b->ptr+1, len)) {
224             /* truncate after ']' and re-add normalized port, if needed */
225             buffer_truncate(b, (size_t)(bracket - b->ptr + 1));
226             break;
227         }
228 
229         *bracket = '\0';/*(terminate IPv6 string)*/
230         if (percent) *percent = '\0'; /*(remove %interface from address)*/
231         rc = sock_addr_inet_pton(&addr, b->ptr+1, AF_INET6, 0);
232         if (percent) *percent = '%'; /*(restore %interface)*/
233         *bracket = ']'; /*(restore bracket)*/
234         if (1 != rc) return -1;
235 
236         sock_addr_inet_ntop(&addr, buf, sizeof(buf));
237         len = strlen(buf);
238         if (percent) {
239             if (percent > bracket) return -1;
240             if (len + (size_t)(bracket - percent) >= sizeof(buf)) return -1;
241             if (len < sizeof(laddr.s)) memcpy(laddr.s, buf, (laddr.n = len));
242             memcpy(buf+len, percent, (size_t)(bracket - percent));
243             len += (size_t)(bracket - percent);
244         }
245         buffer_truncate(b, 1); /* truncate after '[' */
246         buffer_append_str2(b, buf, len, CONST_STR_LEN("]"));
247 
248       #else
249 
250         return -1;
251 
252       #endif
253     } while (0);
254 
255     if (0 != port && port != scheme_port) {
256         buffer_append_string_len(b, CONST_STR_LEN(":"));
257         buffer_append_int(b, (int)port);
258     }
259 
260     return 0;
261 }
262 
263 int http_request_host_policy (buffer * const b, const unsigned int http_parseopts, const int scheme_port) {
264     /* caller should lowercase, as is done in http_request_header_set_Host(),
265      * for consistency in case the value is used prior to calling policy func */
266     /*buffer_to_lower(b);*/
267     return (((http_parseopts & HTTP_PARSEOPT_HOST_STRICT)
268                ? 0 != request_check_hostname(b)
269                : NULL != http_request_check_line_minimal(BUF_PTR_LEN(b)))
270             || ((http_parseopts & HTTP_PARSEOPT_HOST_NORMALIZE)
271                 && 0 != http_request_host_normalize(b, scheme_port)));
272 }
273 
274 __attribute_cold__
275 __attribute_noinline__
276 static int http_request_header_line_invalid(request_st * const restrict r, const int status, const char * const restrict msg) {
277     if (r->conf.log_request_header_on_error) {
278         if (msg) log_error(r->conf.errh, __FILE__, __LINE__, "%s", msg);
279     }
280     return status;
281 }
282 
283 __attribute_cold__
284 __attribute_noinline__
285 static int http_request_header_char_invalid(request_st * const restrict r, const char ch, const char * const restrict msg) {
286     if (r->conf.log_request_header_on_error) {
287         if ((unsigned char)ch > 32 && ch != 127) {
288             log_error(r->conf.errh, __FILE__, __LINE__, "%s ('%c')", msg, ch);
289         }
290         else {
291             log_error(r->conf.errh, __FILE__, __LINE__, "%s (0x%x)", msg, ch);
292         }
293     }
294     return 400;
295 }
296 
297 
298 __attribute_noinline__
299 static void http_request_header_set_Host(request_st * const restrict r, const char * const h, size_t hlen)
300 {
301     r->http_host = http_header_request_set_ptr(r, HTTP_HEADER_HOST,
302                                                CONST_STR_LEN("Host"));
303     buffer_copy_string_len_lc(r->http_host, h, hlen);
304 }
305 
306 
307 int64_t
308 li_restricted_strtoint64 (const char *v, const uint32_t vlen, const char ** const err)
309 {
310     /* base 10 strtoll() parsing exactly vlen chars and requiring digits 0-9 */
311     /* rejects negative numbers and considers values > INT64_MAX an error */
312     /* note: errno is not set; detect error if *err != v+vlen upon return */
313     /*(caller must check 0 == vlen if that is to be an error for caller)*/
314     int64_t rv = 0;
315     uint32_t i;
316     for (i = 0; i < vlen; ++i) {
317         const uint8_t c = ((uint8_t *)v)[i] - '0'; /*(unsigned; underflow ok)*/
318         if (c > 9) break;
319         if (rv > INT64_MAX/10) break;
320         rv *= 10;
321         if (rv > INT64_MAX - c) break;
322         rv += c;
323     }
324     *err = v+i;
325     return rv;
326 }
327 
328 
329 __attribute_cold__
330 static int http_request_parse_duplicate(request_st * const restrict r, const enum http_header_e id, const char * const restrict k, const size_t klen, const char * const restrict v, const size_t vlen) {
331     /* Proxies sometimes send dup headers
332      * if they are the same we ignore the second
333      * if not, we raise an error */
334     const buffer * const vb = http_header_request_get(r, id, k, klen);
335     if (vb && buffer_eq_icase_slen(vb, v, vlen))
336         return 0; /* ignore header; matches existing header */
337 
338     const char *errmsg;
339     switch (id) {
340       case HTTP_HEADER_HOST:
341         errmsg = "duplicate Host header -> 400";
342         break;
343       case HTTP_HEADER_CONTENT_TYPE:
344         errmsg = "duplicate Content-Type header -> 400";
345         break;
346       case HTTP_HEADER_IF_MODIFIED_SINCE:
347         errmsg = "duplicate If-Modified-Since header -> 400";
348         break;
349       case HTTP_HEADER_HTTP2_SETTINGS:
350         errmsg = "duplicate HTTP2-Settings header -> 400";
351         break;
352       default:
353         errmsg = "duplicate header -> 400";
354         break;
355       case HTTP_HEADER_IF_NONE_MATCH:
356         /* if dup, only the first one will survive */
357         return 0; /* ignore header */
358     }
359     return http_request_header_line_invalid(r, 400, errmsg);
360 }
361 
362 
363 /* add header to list of headers
364  * certain headers are also parsed
365  * might drop a header if deemed unnecessary/broken
366  *
367  * returns 0 on success, HTTP status on error
368  */
369 static int http_request_parse_single_header(request_st * const restrict r, const enum http_header_e id, const char * const restrict k, const size_t klen, const char * const restrict v, const size_t vlen) {
370     /*
371      * Note: k might not be '\0'-terminated
372      * Note: v is not '\0'-terminated
373      *   With lighttpd HTTP/1.1 parser, v ends with whitespace
374      *     (one of '\r' '\n' ' ' '\t')
375      *   With lighttpd HTTP/2 parser, v should not be accessed beyond vlen
376      *     (care must be taken to avoid libc funcs which expect z-strings)
377      */
378     /*assert(vlen);*//*(caller must not call this func with 0 klen or 0 vlen)*/
379 
380     switch (id) {
381       /*case HTTP_HEADER_OTHER:*/
382       default:
383         break;
384       case HTTP_HEADER_HOST:
385         if (!light_btst(r->rqst_htags, HTTP_HEADER_HOST)) {
386             if (vlen >= 1024) { /*(expecting < 256)*/
387                 return http_request_header_line_invalid(r, 400, "uri-authority too long -> 400");
388             }
389             /*(http_request_header_append() plus sets r->http_host)*/
390             http_request_header_set_Host(r, v, vlen);
391             return 0;
392         }
393         else if (NULL != r->http_host
394                  && __builtin_expect( buffer_eq_slen(r->http_host,v,vlen), 1)) {
395             /* ignore all Host: headers if match authority in request line */
396             /* (expect Host to match case in :authority of HTTP/2 request) */
397             return 0; /* ignore header */
398         }
399         /* else parse duplicate for match or error */
400         __attribute_fallthrough__
401       case HTTP_HEADER_IF_MODIFIED_SINCE:
402       case HTTP_HEADER_IF_NONE_MATCH:
403       case HTTP_HEADER_CONTENT_TYPE:
404       case HTTP_HEADER_HTTP2_SETTINGS:
405         if (light_btst(r->rqst_htags, id))
406             return http_request_parse_duplicate(r, id, k, klen, v, vlen);
407         break;
408       case HTTP_HEADER_CONNECTION:
409         /* "Connection: close" is common case if header is present */
410         if ((vlen == 5 && buffer_eq_icase_ssn(v, CONST_STR_LEN("close")))
411             || http_header_str_contains_token(v,vlen,CONST_STR_LEN("close"))) {
412             r->keep_alive = 0;
413             break;
414         }
415         if (http_header_str_contains_token(v,vlen,CONST_STR_LEN("keep-alive"))){
416             r->keep_alive = 1;
417             break;
418         }
419         break;
420       case HTTP_HEADER_CONTENT_LENGTH:
421         if (!light_btst(r->rqst_htags, HTTP_HEADER_CONTENT_LENGTH)) {
422             /*(trailing whitespace was removed from vlen)*/
423             /*(not using strtoll() since v might not be z-string)*/
424             const char *err;
425             off_t clen = (off_t)li_restricted_strtoint64(v, vlen, &err);
426             if (err == v+vlen) {
427                 /* (set only if not set to -1 by Transfer-Encoding: chunked) */
428                 if (0 == r->reqbody_length) r->reqbody_length = clen;
429             }
430             else {
431                 return http_request_header_line_invalid(r, 400, "invalid Content-Length header -> 400");
432             }
433         }
434         else {
435             return http_request_header_line_invalid(r, 400, "duplicate Content-Length header -> 400");
436         }
437         break;
438       case HTTP_HEADER_TRANSFER_ENCODING:
439         if (HTTP_VERSION_1_1 != r->http_version) {
440             return http_request_header_line_invalid(r, 400,
441               HTTP_VERSION_1_0 == r->http_version
442                 ? "HTTP/1.0 with Transfer-Encoding (bad HTTP/1.0 proxy?) -> 400"
443                 : "HTTP/2 with Transfer-Encoding is invalid -> 400");
444         }
445 
446         if (!buffer_eq_icase_ss(v, vlen, CONST_STR_LEN("chunked"))) {
447             /* Transfer-Encoding might contain additional encodings,
448              * which are not currently supported by lighttpd */
449             return http_request_header_line_invalid(r, 501, NULL); /* Not Implemented */
450         }
451         r->reqbody_length = -1;
452 
453         /* Transfer-Encoding is a hop-by-hop header,
454          * which must not be blindly forwarded to backends */
455         return 0; /* skip header */
456     }
457 
458     http_header_request_append(r, id, k, klen, v, vlen);
459     return 0;
460 }
461 
462 __attribute_cold__
463 static int http_request_parse_proto_loose(request_st * const restrict r, const char * const restrict ptr, const size_t len, const unsigned int http_parseopts) {
464     const char * proto = memchr(ptr, ' ', len);
465     if (NULL == proto)
466         return http_request_header_line_invalid(r, 400, "incomplete request line -> 400");
467     proto = memchr(proto+1, ' ', len - (proto+1 - ptr));
468     if (NULL == proto)
469         return http_request_header_line_invalid(r, 400, "incomplete request line -> 400");
470     ++proto;
471 
472     if (proto[0]=='H' && proto[1]=='T' && proto[2]=='T' && proto[3]=='P' && proto[4] == '/') {
473         if (proto[5] == '1' && proto[6] == '.' && (proto[7] == '1' || proto[7] == '0')) {
474             /* length already checked before calling this routine */
475             /* (len != (size_t)(proto - ptr + 8)) */
476             if (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT) /*(http_header_strict)*/
477                 return http_request_header_line_invalid(r, 400, "incomplete request line -> 400");
478             r->http_version = (proto[7] == '1') ? HTTP_VERSION_1_1 : HTTP_VERSION_1_0;
479         }
480         else
481             return http_request_header_line_invalid(r, 505, "unknown HTTP version -> 505");
482     }
483     else
484         return http_request_header_line_invalid(r, 400, "unknown protocol -> 400");
485 
486     /* keep-alive default: HTTP/1.1 -> true; HTTP/1.0 -> false */
487     r->keep_alive = (HTTP_VERSION_1_0 != r->http_version);
488 
489     return 0;
490 }
491 
492 __attribute_cold__
493 static const char * http_request_parse_reqline_uri(request_st * const restrict r, const char * const restrict uri, const size_t len, const unsigned int http_parseopts) {
494     const char *nuri;
495     if ((len > 7 && buffer_eq_icase_ssn(uri, "http://", 7)
496         && NULL != (nuri = memchr(uri + 7, '/', len-7)))
497        ||
498        (len > 8 && buffer_eq_icase_ssn(uri, "https://", 8)
499         && NULL != (nuri = memchr(uri + 8, '/', len-8)))) {
500         const char * const host = uri + (uri[4] == ':' ? 7 : 8);
501         const size_t hostlen = nuri - host;
502         if (0 == hostlen || hostlen >= 1024) { /*(expecting < 256)*/
503             http_request_header_line_invalid(r, 400, "uri-authority empty or too long -> 400");
504             return NULL;
505         }
506         /* Insert as "Host" header */
507         http_request_header_set_Host(r, host, hostlen);
508         return nuri;
509     } else if (!(http_parseopts & HTTP_PARSEOPT_HEADER_STRICT) /*(!http_header_strict)*/
510            || (HTTP_METHOD_CONNECT == r->http_method && (uri[0] == ':' || light_isdigit(uri[0])))
511            || (HTTP_METHOD_OPTIONS == r->http_method && uri[0] == '*' && 1 == len)) {
512         /* (permitted) */
513         return uri;
514     } else {
515         http_request_header_line_invalid(r, 400, "request-URI parse error -> 400");
516         return NULL;
517     }
518 }
519 
520 
521 __attribute_cold__
522 __attribute_noinline__
523 static int http_request_parse_header_other(request_st * const restrict r, const char * const restrict k, const int klen, const unsigned int http_header_strict);
524 
525 
526 int
527 http_request_validate_pseudohdrs (request_st * const restrict r, const int scheme, const unsigned int http_parseopts)
528 {
529     /* :method is required to indicate method
530      * CONNECT method must have :method and :authority
531      * All other methods must have at least :method :scheme :path */
532 
533     if (HTTP_METHOD_UNSET == r->http_method)
534         return http_request_header_line_invalid(r, 400,
535           "missing pseudo-header method -> 400");
536 
537     if (__builtin_expect( (HTTP_METHOD_CONNECT != r->http_method), 1)) {
538         if (!scheme)
539             return http_request_header_line_invalid(r, 400,
540               "missing pseudo-header scheme -> 400");
541 
542         if (buffer_is_blank(&r->target))
543             return http_request_header_line_invalid(r, 400,
544               "missing pseudo-header path -> 400");
545 
546         const char * const uri = r->target.ptr;
547         if (*uri != '/') { /* (common case: (*uri == '/')) */
548             if (uri[0] != '*' || uri[1] != '\0'
549                 || HTTP_METHOD_OPTIONS != r->http_method)
550                 return http_request_header_line_invalid(r, 400,
551                   "invalid pseudo-header path -> 400");
552         }
553     }
554     else { /* HTTP_METHOD_CONNECT */
555         if (NULL == r->http_host)
556             return http_request_header_line_invalid(r, 400,
557               "missing pseudo-header authority -> 400");
558         if (!buffer_is_blank(&r->target) || scheme)
559             return http_request_header_line_invalid(r, 400,
560               "invalid pseudo-header with CONNECT -> 400");
561         /* note: this copy occurs prior to http_request_host_policy()
562          * so any consumer handling CONNECT should normalize r->target
563          * as appropriate */
564         buffer_copy_buffer(&r->target, r->http_host);
565     }
566     buffer_copy_buffer(&r->target_orig, &r->target);
567 
568     /* r->http_host, if set, is checked with http_request_host_policy()
569      * in http_request_parse() */
570 
571     /* copied and modified from end of http_request_parse_reqline() */
572 
573     /* check uri for invalid characters */
574     const uint32_t len = buffer_clen(&r->target);/*(http_header_strict)*/
575     const char * const x = (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT)
576       ? (http_parseopts & HTTP_PARSEOPT_URL_NORMALIZE_CTRLS_REJECT)
577           ? NULL /* URI will be checked in http_request_parse_target() */
578           : http_request_check_uri_strict((const uint8_t *)r->target.ptr, len)
579       : http_request_check_line_minimal(r->target.ptr, len);
580     return (NULL == x)
581       ? 0
582       : http_request_header_char_invalid(r, *x,
583           "invalid character in URI -> 400");
584 }
585 
586 
587 int
588 http_request_parse_header (request_st * const restrict r, http_header_parse_ctx * const restrict hpctx)
589 {
590     /* Note: k and v might not be '\0' terminated strings;
591      * care must be taken to avoid libc funcs which expect z-strings */
592     const char * const restrict k = hpctx->k;
593     const char * const restrict v = hpctx->v;
594     const uint32_t klen = hpctx->klen;
595     const uint32_t vlen = hpctx->vlen;
596 
597     if (0 == klen)
598         return http_request_header_line_invalid(r, 400,
599           "invalid header key -> 400");
600 
601     if ((hpctx->hlen += klen + vlen + 4) > hpctx->max_request_field_size) {
602         /*(configurable with server.max-request-field-size; default 8k)*/
603       #if 1 /* emit to error log for people sending large headers */
604         log_error(r->conf.errh, __FILE__, __LINE__,
605                   "oversized request header -> 431");
606         return 431; /* Request Header Fields Too Large */
607       #else
608         /* 431 Request Header Fields Too Large */
609         return http_request_header_line_invalid(r, 431,
610           "oversized request header -> 431");
611       #endif
612     }
613 
614     if (!hpctx->trailers) {
615         if (*k == ':') {
616             /* HTTP/2 request pseudo-header fields */
617             if (!hpctx->pseudo) /*(pseudo header after non-pseudo header)*/
618                 return http_request_header_line_invalid(r, 400,
619                   "invalid pseudo-header -> 400");
620             if (0 == vlen)
621                 return http_request_header_line_invalid(r, 400,
622                   "invalid header value -> 400");
623 
624             /* (note: relies on implementation details using ls-hpack in h2.c)
625              * (hpctx->id mapped from lsxpack_header_t hpack_index, which only
626              *  matches key, not also value, if lsxpack_header_t flags does not
627              *  have LSXPACK_HPACK_VAL_MATCHED set, so HTTP_HEADER_H2_METHOD_GET
628              *  below indicates any method, not only "GET") */
629             if (__builtin_expect( (hpctx->id == HTTP_HEADER_H2_UNKNOWN), 0)) {
630                 switch (klen-1) {
631                   case 4:
632                     if (0 == memcmp(k+1, "path", 4))
633                         hpctx->id = HTTP_HEADER_H2_PATH;
634                     break;
635                   case 6:
636                     if (0 == memcmp(k+1, "method", 6))
637                         hpctx->id = HTTP_HEADER_H2_METHOD_GET;
638                     else if (0 == memcmp(k+1, "scheme", 6))
639                         hpctx->id = HTTP_HEADER_H2_SCHEME_HTTP;
640                     break;
641                   case 9:
642                     if (0 == memcmp(k+1, "authority", 9))
643                         hpctx->id = HTTP_HEADER_H2_AUTHORITY;
644                     break;
645                   default:
646                     break;
647                 }
648                 if (hpctx->id >= HTTP_HEADER_H2_UNKNOWN)
649                     return http_request_header_line_invalid(r, 400,
650                       "invalid pseudo-header -> 400");
651             }
652 
653             switch (hpctx->id) {
654               case HTTP_HEADER_H2_AUTHORITY:
655                 if (__builtin_expect( (r->http_host != NULL), 0))
656                     break;
657                 if (vlen >= 1024) /*(expecting < 256)*/
658                     return http_request_header_line_invalid(r, 400,
659                       "invalid pseudo-header authority too long -> 400");
660                 /* insert as "Host" header */
661                 http_request_header_set_Host(r, v, vlen);
662                 return 0;
663               case HTTP_HEADER_H2_METHOD_GET:  /*(any method, not only "GET")*/
664               case HTTP_HEADER_H2_METHOD_POST:
665                 if (__builtin_expect( (HTTP_METHOD_UNSET != r->http_method), 0))
666                     break;
667                 r->http_method = get_http_method_key(v, vlen);
668                 if (HTTP_METHOD_UNSET >= r->http_method)
669                     return http_request_header_line_invalid(r, 501,
670                       "unknown http-method -> 501");
671                 return 0;
672               case HTTP_HEADER_H2_PATH:            /*(any path, not only "/")*/
673               case HTTP_HEADER_H2_PATH_INDEX_HTML:
674                 if (__builtin_expect( (!buffer_is_blank(&r->target)), 0))
675                     break;
676                 buffer_copy_string_len(&r->target, v, vlen);
677                 return 0;
678               case HTTP_HEADER_H2_SCHEME_HTTP: /*(any scheme, not only "http")*/
679               case HTTP_HEADER_H2_SCHEME_HTTPS:
680                 if (__builtin_expect( (hpctx->scheme), 0))
681                     break;
682                 hpctx->scheme = 1; /*(marked present, but otherwise ignored)*/
683                 return 0;
684                #if 0
685                 switch (vlen) {/*(validated, but then ignored)*/
686                   case 5: /* "https" */
687                     if (v[4]!='s') break;
688                     __attribute_fallthrough__
689                   case 4: /* "http" */
690                     if (v[0]=='h' && v[1]=='t' && v[2]=='t' && v[3]=='p') {
691                         hpctx->scheme = 1;
692                         return 0;
693                     }
694                     break;
695                   default:
696                     break;
697                 }
698                 return http_request_header_line_invalid(r, 400,
699                   "unknown pseudo-header scheme -> 400");
700                #endif
701               default:
702                 return http_request_header_line_invalid(r, 400,
703                   "invalid pseudo-header -> 400");
704             }
705             return http_request_header_line_invalid(r, 400,
706               "repeated pseudo-header -> 400");
707         }
708         else { /*(non-pseudo headers)*/
709             if (hpctx->pseudo) { /*(transition to non-pseudo headers)*/
710                 hpctx->pseudo = 0;
711                 int status =
712                   http_request_validate_pseudohdrs(r, hpctx->scheme,
713                                                    hpctx->http_parseopts);
714                 if (0 != status) return status;
715             }
716             if (0 == vlen)
717                 return 0;
718 
719             const unsigned int http_header_strict =
720               (hpctx->http_parseopts & HTTP_PARSEOPT_HEADER_STRICT);
721 
722             const char * const x = (http_header_strict)
723               ? http_request_check_line_strict(v, vlen)
724               : http_request_check_line_minimal(v, vlen);
725             if (x)
726                 return http_request_header_char_invalid(r, *x,
727                   "invalid character in header -> 400");
728 
729             if (__builtin_expect( (hpctx->id == HTTP_HEADER_H2_UNKNOWN), 0)) {
730                 uint32_t j = 0;
731                 while (j < klen && (light_islower(k[j]) || k[j] == '-'))
732                     ++j;
733 
734                 if (__builtin_expect( (j != klen), 0)) {
735                     if (light_isupper(k[j]))
736                         return 400;
737                     if (0 != http_request_parse_header_other(r, k+j, klen-j,
738                                                             http_header_strict))
739                         return 400;
740                 }
741 
742                 hpctx->id = http_header_hkey_get_lc(k, klen);
743             }
744 
745             const enum http_header_e id = (enum http_header_e)hpctx->id;
746 
747             if (__builtin_expect( (id == HTTP_HEADER_TE), 0)
748                 && !buffer_eq_icase_ss(v, vlen, CONST_STR_LEN("trailers")))
749                 return http_request_header_line_invalid(r, 400,
750                   "invalid TE header value with HTTP/2 -> 400");
751 
752             return http_request_parse_single_header(r, id, k, klen, v, vlen);
753         }
754     }
755     else { /*(trailers)*/
756         if (*k == ':')
757             return http_request_header_line_invalid(r, 400,
758               "invalid pseudo-header in trailers -> 400");
759         /* ignore trailers (after required HPACK decoding) if streaming
760          * request body to backend since headers have already been sent
761          * to backend via Common Gateway Interface (CGI) (CGI, FastCGI,
762          * SCGI, etc) or HTTP/1.1 (proxy) (mod_proxy does not currently
763          * support using HTTP/2 to connect to backends) */
764       #if 0 /* (if needed, save flag in hpctx instead of fdevent.h dependency)*/
765         if (r->conf.stream_request_body & FDEVENT_STREAM_REQUEST)
766             return 0;
767       #endif
768         /* Note: do not unconditionally merge into headers since if
769          * headers had already been sent to backend, then mod_accesslog
770          * logging of request headers might be inaccurate.
771          * Many simple backends do not support HTTP/1.1 requests sending
772          * Transfer-Encoding: chunked, and even those that do might not
773          * handle trailers.  Some backends do not even support HTTP/1.1.
774          * For all these reasons, ignore trailers if streaming request
775          * body to backend.  Revisit in future if adding support for
776          * connecting to backends using HTTP/2 (with explicit config
777          * option to force connecting to backends using HTTP/2) */
778 
779         /* XXX: TODO: request trailers not handled if streaming reqbody
780          * XXX: must ensure that trailers are not disallowed field-names
781          */
782 
783       #if 0
784         if (0 == vlen)
785             return 0;
786       #endif
787 
788         return 0;
789     }
790 }
791 
792 
793 static int http_request_parse_reqline(request_st * const restrict r, const char * const restrict ptr, const unsigned short * const restrict hoff, const unsigned int http_parseopts) {
794     size_t len = hoff[2];
795 
796     /* parse the first line of the request
797      * <method> <uri> <protocol>\r\n
798      * */
799     if (len < 13) /* minimum len with (!http_header_strict): "x x HTTP/1.0\n" */
800         return http_request_header_line_invalid(r, 400, "invalid request line (too short) -> 400");
801     if (ptr[len-2] == '\r')
802         len-=2;
803     else if (!(http_parseopts & HTTP_PARSEOPT_HEADER_STRICT)) /*(!http_header_strict)*/
804         len-=1;
805     else
806         return http_request_header_line_invalid(r, 400, "missing CR before LF in header -> 400");
807 
808     /*
809      * RFC7230:
810      *   HTTP-version  = HTTP-name "/" DIGIT "." DIGIT
811      *   HTTP-name     = %x48.54.54.50 ; "HTTP", case-sensitive
812      */
813 
814     /* protocol is expected to be " HTTP/1.1" or " HTTP/1.0" at end of line */
815     union proto_un {
816       char c[8];
817       uint64_t u;
818     };
819     static const union proto_un http_1_1 = {{'H','T','T','P','/','1','.','1'}};
820     static const union proto_un http_1_0 = {{'H','T','T','P','/','1','.','0'}};
821     const char *p = ptr + len - 8;
822     union proto_un proto8;
823     proto8.c[0]=p[0]; proto8.c[1]=p[1]; proto8.c[2]=p[2]; proto8.c[3]=p[3];
824     proto8.c[4]=p[4]; proto8.c[5]=p[5]; proto8.c[6]=p[6]; proto8.c[7]=p[7];
825     if (p[-1] == ' ' && http_1_1.u == proto8.u) {
826         r->http_version = HTTP_VERSION_1_1;
827         r->keep_alive = 1; /* keep-alive default: HTTP/1.1 -> true */
828     }
829     else if (p[-1] == ' ' && http_1_0.u == proto8.u) {
830         r->http_version = HTTP_VERSION_1_0;
831         r->keep_alive = 0; /* keep-alive default: HTTP/1.0 -> false */
832     }
833     else {
834         int status = http_request_parse_proto_loose(r,ptr,len,http_parseopts);
835         if (0 != status) return status;
836         /*(space char must exist if http_request_parse_proto_loose() succeeds)*/
837         for (p = ptr + len - 9; p[-1] != ' '; --p) ;
838     }
839 
840     /* method is expected to be a short string in the general case */
841     size_t i = 0;
842     while (ptr[i] != ' ') ++i;
843   #if 0 /*(space must exist if protocol was parsed successfully)*/
844     while (i < len && ptr[i] != ' ') ++i;
845     if (ptr[i] != ' ')
846         return http_request_header_line_invalid(r, 400, "incomplete request line -> 400");
847   #endif
848 
849     r->http_method = get_http_method_key(ptr, i);
850     if (HTTP_METHOD_UNSET >= r->http_method)
851         return http_request_header_line_invalid(r, 501, "unknown http-method -> 501");
852 
853     const char *uri = ptr + i + 1;
854 
855     if (uri == p)
856         return http_request_header_line_invalid(r, 400, "no uri specified -> 400");
857     len = (size_t)(p - uri - 1);
858 
859     if (*uri != '/') { /* (common case: (*uri == '/')) */
860         uri = http_request_parse_reqline_uri(r, uri, len, http_parseopts);
861         if (NULL == uri) return 400;
862         len = (size_t)(p - uri - 1);
863     }
864 
865     if (0 == len)
866         return http_request_header_line_invalid(r, 400, "no uri specified -> 400");
867 
868     /* check uri for invalid characters */     /* http_header_strict */
869     const char * const x = (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT)
870       ? (http_parseopts & HTTP_PARSEOPT_URL_NORMALIZE_CTRLS_REJECT)
871           ? NULL /* URI will be checked in http_request_parse_target() */
872           : http_request_check_uri_strict((const uint8_t *)uri, len)
873       : memchr(ptr, '\0', hoff[hoff[0]]);/* check entire headers set for '\0' */
874     if (x)
875         http_request_header_char_invalid(r, *x, "invalid character in URI -> 400");
876 
877     buffer_copy_string_len(&r->target, uri, len);
878     buffer_copy_string_len(&r->target_orig, uri, len);
879     return 0;
880 }
881 
882 int http_request_parse_target(request_st * const r, int scheme_port) {
883     /* URI is parsed into components at start of request and may
884      * also be re-parsed upon HANDLER_COMEBACK during the request
885      * r->target is expected to be a "/url-part?query-part"
886      *   (and *not* a fully-qualified URI starting https://...)
887      * r->uri.authority is expected to be parsed elsewhere into r->http_host
888      */
889 
890     /**
891      * prepare strings
892      *
893      * - uri.path
894      * - uri.query
895      *
896      */
897 
898     /**
899      * Name according to RFC 2396
900      *
901      * - scheme
902      * - authority
903      * - path
904      * - query
905      *
906      * (scheme)://(authority)(path)?(query)#fragment
907      *
908      */
909 
910     /* take initial scheme value from connection-level state
911      * (request r->uri.scheme can be overwritten for later,
912      *  for example by mod_extforward or mod_magnet) */
913     buffer_copy_string_len(&r->uri.scheme, "https", scheme_port == 443 ? 5 : 4);
914 
915     buffer * const target = &r->target;
916     if (r->http_method == HTTP_METHOD_CONNECT
917         || (r->http_method == HTTP_METHOD_OPTIONS
918             && target->ptr[0] == '*'
919             && target->ptr[1] == '\0')) {
920         /* CONNECT ... (or) OPTIONS * ... */
921         buffer_copy_buffer(&r->uri.path, target);
922         buffer_clear(&r->uri.query);
923         return 0;
924     }
925 
926     char *qstr;
927     if (r->conf.http_parseopts & HTTP_PARSEOPT_URL_NORMALIZE) {
928         /*uint32_t len = buffer_clen(target);*/
929         int qs = burl_normalize(target, r->tmp_buf, r->conf.http_parseopts);
930         if (-2 == qs)
931             return http_request_header_line_invalid(r, 400,
932               "invalid character in URI -> 400"); /* Bad Request */
933         qstr = (-1 == qs) ? NULL : target->ptr+qs;
934       #if 0  /* future: might enable here, or below for all requests */
935         /* (Note: total header size not recalculated on HANDLER_COMEBACK
936          *  even if other request headers changed during processing)
937          * (If (0 != r->loops_per_request), then the generated
938          *  request is too large.  Should a different error be returned?) */
939         r->rqst_header_len -= len;
940         len = buffer_clen(target);
941         r->rqst_header_len += len;
942         if (len > MAX_HTTP_REQUEST_URI) {
943             return 414; /* 414 URI Too Long */
944         }
945         if (r->rqst_header_len > MAX_HTTP_REQUEST_HEADER) {
946             log_error(r->conf.errh, __FILE__, __LINE__,
947               "request header fields too large: %u -> 431",
948               r->rqst_header_len);
949             return 431; /* Request Header Fields Too Large */
950         }
951       #endif
952     }
953     else {
954         size_t rlen = buffer_clen(target);
955         qstr = memchr(target->ptr, '#', rlen);/* discard fragment */
956         if (qstr) {
957             rlen = (size_t)(qstr - target->ptr);
958             buffer_truncate(target, rlen);
959         }
960         qstr = memchr(target->ptr, '?', rlen);
961     }
962 
963     /** extract query string from target */
964     const char * const pstr = target->ptr;
965     const uint32_t rlen = buffer_clen(target);
966     uint32_t plen;
967     if (NULL != qstr) {
968         plen = (uint32_t)(qstr - pstr);
969         buffer_copy_string_len(&r->uri.query, qstr + 1, rlen - plen - 1);
970     }
971     else {
972         plen = rlen;
973         buffer_clear(&r->uri.query);
974     }
975     buffer_copy_string_len(&r->uri.path, pstr, plen);
976 
977     /* decode url to path
978      *
979      * - decode url-encodings  (e.g. %20 -> ' ')
980      * - remove path-modifiers (e.g. /../)
981      */
982 
983     buffer_urldecode_path(&r->uri.path);
984     buffer_path_simplify(&r->uri.path);
985     if (r->uri.path.ptr[0] != '/')
986         return http_request_header_line_invalid(r, 400,
987           "uri-path does not begin with '/' -> 400"); /* Bad Request */
988 
989     return 0;
990 }
991 
992 __attribute_cold__
993 __attribute_noinline__
994 static int http_request_parse_header_other(request_st * const restrict r, const char * const restrict k, const int klen, const unsigned int http_header_strict) {
995     for (int i = 0; i < klen; ++i) {
996         if (light_isalpha(k[i]) || k[i] == '-') continue; /*(common cases)*/
997         /**
998          * 1*<any CHAR except CTLs or separators>
999          * CTLs == 0-31 + 127, CHAR = 7-bit ascii (0..127)
1000          *
1001          */
1002         switch(k[i]) {
1003         case ' ':
1004         case '\t':
1005             return http_request_header_line_invalid(r, 400, "WS character in key -> 400");
1006         case '\r':
1007         case '\n':
1008         case '(':
1009         case ')':
1010         case '<':
1011         case '>':
1012         case '@':
1013         case ',':
1014         case ':':
1015         case ';':
1016         case '\\':
1017         case '\"':
1018         case '/':
1019         case '[':
1020         case ']':
1021         case '?':
1022         case '=':
1023         case '{':
1024         case '}':
1025             return http_request_header_char_invalid(r, k[i], "invalid character in header key -> 400");
1026         default:
1027             if (http_header_strict ? (k[i] < 32 || ((unsigned char *)k)[i] >= 127) : k[i] == '\0')
1028                 return http_request_header_char_invalid(r, k[i], "invalid character in header key -> 400");
1029             break; /* ok */
1030         }
1031     }
1032     return 0;
1033 }
1034 
1035 static int http_request_parse_headers(request_st * const restrict r, char * const restrict ptr, const unsigned short * const restrict hoff, const unsigned int http_parseopts) {
1036     const unsigned int http_header_strict = (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT);
1037 
1038   #if 0 /*(not checked here; will later result in invalid label for HTTP header)*/
1039     int i = hoff[2];
1040 
1041     if (ptr[i] == ' ' || ptr[i] == '\t') {
1042         return http_request_header_line_invalid(r, 400, "WS at the start of first line -> 400");
1043     }
1044   #endif
1045 
1046     for (int i = 2; i < hoff[0]; ++i) {
1047         const char *k = ptr + hoff[i];
1048         /* one past last line hoff[hoff[0]] is to final "\r\n" */
1049         char *end = ptr + hoff[i+1];
1050 
1051         const char *colon = memchr(k, ':', end - k);
1052         if (NULL == colon)
1053             return http_request_header_line_invalid(r, 400, "invalid header missing ':' -> 400");
1054 
1055         const char *v = colon + 1;
1056 
1057         /* RFC7230 Hypertext Transfer Protocol (HTTP/1.1): Message Syntax and Routing
1058          * 3.2.4.  Field Parsing
1059          * [...]
1060          * No whitespace is allowed between the header field-name and colon.  In
1061          * the past, differences in the handling of such whitespace have led to
1062          * security vulnerabilities in request routing and response handling.  A
1063          * server MUST reject any received request message that contains
1064          * whitespace between a header field-name and colon with a response code
1065          * of 400 (Bad Request).  A proxy MUST remove any such whitespace from a
1066          * response message before forwarding the message downstream.
1067          */
1068         /* (line k[-1] is always preceded by a '\n',
1069          *  including first header after request-line,
1070          *  so no need to check colon != k) */
1071         if (colon[-1] == ' ' || colon[-1] == '\t') {
1072             if (http_header_strict) {
1073                 return http_request_header_line_invalid(r, 400, "invalid whitespace between field-name and colon -> 400");
1074             }
1075             else {
1076                 /* remove trailing whitespace from key(if !http_header_strict)*/
1077                 do { --colon; } while (colon[-1] == ' ' || colon[-1] == '\t');
1078             }
1079         }
1080 
1081         const int klen = (int)(colon - k);
1082         if (0 == klen)
1083             return http_request_header_line_invalid(r, 400, "invalid header key -> 400");
1084         const enum http_header_e id = http_header_hkey_get(k, klen);
1085 
1086         if (id == HTTP_HEADER_OTHER) {
1087             for (int j = 0; j < klen; ++j) {
1088                 if (light_isalpha(k[j]) || k[j] == '-') continue; /*(common cases)*/
1089                 if (0 != http_request_parse_header_other(r, k+j, klen-j, http_header_strict))
1090                     return 400;
1091                 break;
1092             }
1093         }
1094 
1095         /* remove leading whitespace from value */
1096         while (*v == ' ' || *v == '\t') ++v;
1097 
1098         for (; i+1 <= hoff[0]; ++i) {
1099             end = ptr + hoff[i+1];
1100             if (end[0] != ' ' && end[0] != '\t') break;
1101 
1102             /* line folding */
1103           #ifdef __COVERITY__
1104             force_assert(end - k >= 2);
1105           #endif
1106             if (end[-2] == '\r')
1107                 end[-2] = ' ';
1108             else if (http_header_strict)
1109                 return http_request_header_line_invalid(r, 400, "missing CR before LF in header -> 400");
1110             end[-1] = ' ';
1111         }
1112       #ifdef __COVERITY__
1113         /*(buf holding k has non-zero request-line, so end[-2] valid)*/
1114         force_assert(end >= k + 2);
1115       #endif
1116         if (end[-2] == '\r')
1117             --end;
1118         else if (http_header_strict)
1119             return http_request_header_line_invalid(r, 400, "missing CR before LF in header -> 400");
1120         /* remove trailing whitespace from value (+ remove '\r\n') */
1121         /* (line k[-1] is always preceded by a '\n',
1122          *  including first header after request-line,
1123          *  so no need to check (end != k)) */
1124         do { --end; } while (end[-1] == ' ' || end[-1] == '\t');
1125 
1126         const int vlen = (int)(end - v);
1127         /* empty header-fields are not allowed by HTTP-RFC, we just ignore them */
1128         if (vlen <= 0) continue; /* ignore header */
1129 
1130         if (http_header_strict) {
1131             const char * const x = http_request_check_line_strict(v, vlen);
1132             if (x)
1133                 return http_request_header_char_invalid(r, *x,
1134                   "invalid character in header -> 400");
1135         } /* else URI already checked in http_request_parse_reqline() for any '\0' */
1136 
1137         int status = http_request_parse_single_header(r, id, k, (size_t)klen, v, (size_t)vlen);
1138         if (0 != status) return status;
1139     }
1140 
1141     return 0;
1142 }
1143 
1144 
1145 static int
1146 http_request_parse (request_st * const restrict r, const int scheme_port)
1147 {
1148     int status = http_request_parse_target(r, scheme_port);
1149     if (0 != status) return status;
1150 
1151     /* post-processing */
1152     const unsigned int http_parseopts = r->conf.http_parseopts;
1153 
1154     /* check hostname field if it is set */
1155     /*(r->http_host might not be set until after parsing request headers)*/
1156     if (__builtin_expect( (r->http_host != NULL), 1)) {
1157         if (0 != http_request_host_policy(r->http_host,
1158                                           http_parseopts, scheme_port))
1159             return http_request_header_line_invalid(r, 400, "Invalid Hostname -> 400");
1160         buffer_copy_buffer(&r->uri.authority, r->http_host);
1161     }
1162     else {
1163         buffer_copy_string_len(&r->uri.authority, CONST_STR_LEN(""));
1164         if (r->http_version >= HTTP_VERSION_1_1)
1165             return http_request_header_line_invalid(r, 400, "HTTP/1.1 but Host missing -> 400");
1166     }
1167 
1168     if (HTTP_VERSION_1_1 != r->http_version
1169         && (r->rqst_htags
1170             & (light_bshift(HTTP_HEADER_UPGRADE)
1171               |light_bshift(HTTP_HEADER_HTTP2_SETTINGS)))) {
1172         return http_request_header_line_invalid(r, 400, "invalid hop-by-hop header w/o HTTP/1.1 -> 400");
1173     }
1174 
1175     if (0 == r->reqbody_length) {
1176         /* POST requires Content-Length (or Transfer-Encoding)
1177          * (-1 == r->reqbody_length when Transfer-Encoding: chunked)*/
1178         if (HTTP_METHOD_POST == r->http_method
1179             && !light_btst(r->rqst_htags, HTTP_HEADER_CONTENT_LENGTH)) {
1180             return http_request_header_line_invalid(r, 411, "POST-request, but content-length missing -> 411");
1181         }
1182     }
1183     else {
1184         /* (-1 == r->reqbody_length when Transfer-Encoding: chunked)*/
1185         if (-1 == r->reqbody_length
1186             && light_btst(r->rqst_htags, HTTP_HEADER_CONTENT_LENGTH)) {
1187             /* RFC7230 Hypertext Transfer Protocol (HTTP/1.1): Message Syntax and Routing
1188              * 3.3.3.  Message Body Length
1189              * [...]
1190              * If a message is received with both a Transfer-Encoding and a
1191              * Content-Length header field, the Transfer-Encoding overrides the
1192              * Content-Length.  Such a message might indicate an attempt to
1193              * perform request smuggling (Section 9.5) or response splitting
1194              * (Section 9.4) and ought to be handled as an error.  A sender MUST
1195              * remove the received Content-Length field prior to forwarding such
1196              * a message downstream.
1197              */
1198             const unsigned int http_header_strict =
1199               (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT);
1200             if (http_header_strict) {
1201                 return http_request_header_line_invalid(r, 400, "invalid Transfer-Encoding + Content-Length -> 400");
1202             }
1203             else {
1204                 /* ignore Content-Length */
1205                 http_header_request_unset(r, HTTP_HEADER_CONTENT_LENGTH, CONST_STR_LEN("Content-Length"));
1206             }
1207         }
1208         if (http_method_get_or_head(r->http_method)
1209             && !(http_parseopts & HTTP_PARSEOPT_METHOD_GET_BODY)) {
1210             return http_request_header_line_invalid(r, 400, "GET/HEAD with content-length -> 400");
1211         }
1212     }
1213 
1214     return 0;
1215 }
1216 
1217 
1218 static int
1219 http_request_parse_hoff (request_st * const restrict r, char * const restrict hdrs, const unsigned short * const restrict hoff, const int scheme_port)
1220 {
1221     /*
1222      * Request: "^(GET|POST|HEAD|...) ([^ ]+(\\?[^ ]+|)) (HTTP/1\\.[01])$"
1223      * Header : "^([-a-zA-Z]+): (.+)$"
1224      * End    : "^$"
1225      */
1226 
1227     int status;
1228     const unsigned int http_parseopts = r->conf.http_parseopts;
1229 
1230     status = http_request_parse_reqline(r, hdrs, hoff, http_parseopts);
1231     if (0 != status) return status;
1232 
1233     status = http_request_parse_headers(r, hdrs, hoff, http_parseopts);
1234     if (0 != status) return status;
1235 
1236     return http_request_parse(r, scheme_port);
1237 }
1238 
1239 
1240 static void
1241 http_request_headers_fin (request_st * const restrict r)
1242 {
1243     if (0 == r->http_status) {
1244       #if 0
1245         r->conditional_is_valid = (1 << COMP_SERVER_SOCKET)
1246                                 | (1 << COMP_HTTP_SCHEME)
1247                                 | (1 << COMP_HTTP_HOST)
1248                                 | (1 << COMP_HTTP_REMOTE_IP)
1249                                 | (1 << COMP_HTTP_REQUEST_METHOD)
1250                                 | (1 << COMP_HTTP_URL)
1251                                 | (1 << COMP_HTTP_QUERY_STRING)
1252                                 | (1 << COMP_HTTP_REQUEST_HEADER);
1253       #else
1254         /* all config conditions are valid after parsing header
1255          * (set all bits; remove dependency on plugin_config.h) */
1256         r->conditional_is_valid = ~0u;
1257       #endif
1258     }
1259     else {
1260         r->keep_alive = 0;
1261         r->reqbody_length = 0;
1262     }
1263 }
1264 
1265 
1266 void
1267 http_request_headers_process (request_st * const restrict r, char * const restrict hdrs, const unsigned short * const restrict hoff, const int scheme_port)
1268 {
1269     r->http_status = http_request_parse_hoff(r, hdrs, hoff, scheme_port);
1270 
1271     http_request_headers_fin(r);
1272 
1273     if (__builtin_expect( (0 != r->http_status), 0)) {
1274         if (r->conf.log_request_header_on_error) {
1275             /*(http_request_parse_headers() modifies hdrs only to
1276              * undo line-wrapping in-place using spaces)*/
1277             log_error_multiline(r->conf.errh, __FILE__, __LINE__,
1278                                 hdrs, r->rqst_header_len, "rqst: ");
1279         }
1280     }
1281 }
1282 
1283 
1284 void
1285 http_request_headers_process_h2 (request_st * const restrict r, const int scheme_port)
1286 {
1287     if (0 == r->http_status)
1288         r->http_status = http_request_parse(r, scheme_port);
1289 
1290     if (0 == r->http_status) {
1291         if (light_btst(r->rqst_htags, HTTP_HEADER_CONNECTION))
1292             r->http_status = http_request_header_line_invalid(r, 400,
1293               "invalid Connection header with HTTP/2 -> 400");
1294     }
1295 
1296     http_request_headers_fin(r);
1297 
1298     /* limited; headers not collected into a single buf for HTTP/2 */
1299     if (__builtin_expect( (0 != r->http_status), 0)) {
1300         if (r->conf.log_request_header_on_error) {
1301             log_error(r->conf.errh, __FILE__, __LINE__,
1302               "request-header:\n:authority: %s\n:method: %s\n:path: %s",
1303               r->http_host ? r->http_host->ptr : "",
1304               http_method_buf(r->http_method)->ptr,
1305               !buffer_is_blank(&r->target) ? r->target.ptr : "");
1306         }
1307     }
1308 
1309     /* ignore Upgrade if using HTTP/2 */
1310     if (light_btst(r->rqst_htags, HTTP_HEADER_UPGRADE))
1311         http_header_request_unset(r, HTTP_HEADER_UPGRADE,
1312                                   CONST_STR_LEN("upgrade"));
1313     /* XXX: should filter out other hop-by-hop connection headers, too */
1314 }
1315