xref: /lighttpd1.4/src/request.c (revision e8a6ed6e)
1 /*
2  * request - HTTP request processing
3  *
4  * Fully-rewritten from original
5  * Copyright(c) 2018 Glenn Strauss gstrauss()gluelogic.com  All rights reserved
6  * License: BSD 3-clause (same as lighttpd)
7  */
8 #include "first.h"
9 
10 #include "request.h"
11 #include "burl.h"
12 #include "http_header.h"
13 #include "http_kv.h"
14 #include "log.h"
15 #include "sock_addr.h"
16 
17 #include <limits.h>
18 #include <stdint.h>
19 #include <stdlib.h>
20 #include <string.h>
21 
22 static int request_check_hostname(buffer * const host) {
23     /*
24      *       hostport      = host [ ":" port ]
25      *       host          = hostname | IPv4address | IPv6address
26      *       hostname      = *( domainlabel "." ) toplabel [ "." ]
27      *       domainlabel   = alphanum | alphanum *( alphanum | "-" ) alphanum
28      *       toplabel      = alpha | alpha *( alphanum | "-" ) alphanum
29      *       IPv4address   = 1*digit "." 1*digit "." 1*digit "." 1*digit
30      *       IPv6address   = "[" ... "]"
31      *       port          = *digit
32      */
33 
34     const char *h = host->ptr;
35 
36     if (*h != '[') {
37         uint32_t len = buffer_clen(host);
38         const char * const colon = memchr(h, ':', len);
39         uint32_t hlen = colon ? (uint32_t)(colon - h) : len;
40 
41         /* if hostname ends in ".", strip it */
42         if (__builtin_expect( (0 == hlen), 0)) return -1;
43         if (__builtin_expect( (h[hlen-1] == '.'), 0)) {
44             /* shift port info one left */
45             if (--hlen == 0) return -1;
46             --len;
47             if (NULL != colon)
48                 memmove(host->ptr+hlen, colon, len - hlen);
49             buffer_truncate(host, len);
50         }
51 
52         int label_len = 0;
53         int allnumeric = 1;
54         int numeric = 1;
55         int level = 0;
56         for (uint32_t i = 0; i < hlen; ++i) {
57             const int ch = h[i];
58             ++label_len;
59             if (light_isdigit(ch))
60                 continue;
61             else if ((light_isalpha(ch) || (ch == '-' && i != 0)))
62                 numeric = 0;
63             else if (ch == '.' && 1 != label_len && '-' != h[i+1]) {
64                 allnumeric &= numeric;
65                 numeric = 1;
66                 label_len = 0;
67                 ++level;
68             }
69             else
70                 return -1;
71         }
72         /* (if last segment numeric, then IPv4 and must have 4 numeric parts) */
73         if (0 == label_len || (numeric && (level != 3 || !allnumeric)))
74             return -1;
75 
76         h += hlen;
77     }
78     else {  /* IPv6 address */
79         /* check the address inside [...]; note: not fully validating */
80         /* (note: not allowing scoped literals, e.g. %eth0 suffix) */
81         ++h; /* step past '[' */
82         int cnt = 0;
83         while (light_isxdigit(*h) || *h == '.' || (*h == ':' && ++cnt < 8)) ++h;
84         /*(invalid char, too many ':', missing ']', or empty "[]")*/
85         if (*h != ']' || h - host->ptr == 1) return -1;
86         ++h; /* step past ']' */
87     }
88 
89     /* check numerical port, if present */
90     if (*h == ':') {
91         if (__builtin_expect( (h[1] == '\0'), 0)) /*(remove trailing colon)*/
92             buffer_truncate(host, h - host->ptr);
93         do { ++h; } while (light_isdigit(*h));
94     }
95 
96     return (*h == '\0') ? 0 : -1;
97 }
98 
99 int http_request_host_normalize(buffer * const b, const int scheme_port) {
100     /*
101      * check for and canonicalize numeric IP address and portnum (optional)
102      * (IP address may be followed by ":portnum" (optional))
103      * - IPv6: "[...]"
104      * - IPv4: "x.x.x.x"
105      * - IPv4: 12345678   (32-bit decimal number)
106      * - IPv4: 012345678  (32-bit octal number)
107      * - IPv4: 0x12345678 (32-bit hex number)
108      *
109      * allow any chars (except ':' and '\0' and stray '[' or ']')
110      *   (other code may check chars more strictly or more pedantically)
111      * ':'  delimits (optional) port at end of string
112      * "[]" wraps IPv6 address literal
113      * '\0' should have been rejected earlier were it present
114      *
115      * any chars includes, but is not limited to:
116      * - allow '-' any where, even at beginning of word
117      *     (security caution: might be confused for cmd flag if passed to shell)
118      * - allow all-digit TLDs
119      *     (might be mistaken for IPv4 addr by inet_aton()
120      *      unless non-digits appear in subdomain)
121      */
122 
123     /* Note: not using getaddrinfo() since it does not support "[]" around IPv6
124      * and is not as lenient as inet_aton() and inet_addr() for IPv4 strings.
125      * Not using inet_pton() (when available) on IPv4 for similar reasons. */
126 
127     const char * const p = b->ptr;
128     const size_t blen = buffer_clen(b);
129     long port = 0;
130 
131     if (*p != '[') {
132         char * const colon = (char *)memchr(p, ':', blen);
133         if (colon) {
134             if (*p == ':') return -1; /*(empty host then port, or naked IPv6)*/
135             if (colon[1] != '\0') {
136                 char *e;
137                 port = strtol(colon+1, &e, 0); /*(allow decimal, octal, hex)*/
138                 if (0 < port && port <= USHRT_MAX && *e == '\0') {
139                     /* valid port */
140                 } else {
141                     return -1;
142                 }
143             } /*(else ignore stray colon at string end)*/
144             buffer_truncate(b, (size_t)(colon - p)); /*(remove port str)*/
145         }
146 
147         if (light_isdigit(*p)) do {
148             /* (IPv4 address literal or domain starting w/ digit (e.g. 3com))*/
149             /* (check one-element cache of normalized IPv4 address string) */
150             static struct { char s[INET_ADDRSTRLEN]; size_t n; } laddr;
151             size_t n = colon ? (size_t)(colon - p) : blen;
152             sock_addr addr;
153             if (n == laddr.n && 0 == memcmp(p, laddr.s, n)) break;
154             if (1 == sock_addr_inet_pton(&addr, p, AF_INET, 0)) {
155                 sock_addr_inet_ntop_copy_buffer(b, &addr);
156                 n = buffer_clen(b);
157                 if (n < sizeof(laddr.s)) memcpy(laddr.s, b->ptr, (laddr.n = n));
158             }
159         } while (0);
160     } else do { /* IPv6 addr */
161       #if defined(HAVE_IPV6) && defined(HAVE_INET_PTON)
162 
163         /* (check one-element cache of normalized IPv4 address string) */
164         static struct { char s[INET6_ADDRSTRLEN]; size_t n; } laddr;
165         sock_addr addr;
166         char *bracket = b->ptr+blen-1;
167         char *percent = strchr(b->ptr+1, '%');
168         size_t len;
169         int rc;
170         char buf[INET6_ADDRSTRLEN+16]; /*(+16 for potential %interface name)*/
171         if (blen <= 2) return -1; /*(invalid "[]")*/
172         if (*bracket != ']') {
173             bracket = (char *)memchr(b->ptr+1, ']', blen-1);
174             if (NULL == bracket || bracket[1] != ':'  || bracket - b->ptr == 1){
175                return -1;
176             }
177             if (bracket[2] != '\0') { /*(ignore stray colon at string end)*/
178                 char *e;
179                 port = strtol(bracket+2, &e, 0); /*(allow decimal, octal, hex)*/
180                 if (0 < port && port <= USHRT_MAX && *e == '\0') {
181                     /* valid port */
182                 } else {
183                     return -1;
184                 }
185             }
186         }
187 
188         len = (size_t)((percent ? percent : bracket) - (b->ptr+1));
189         if (laddr.n == len && 0 == memcmp(laddr.s, b->ptr+1, len)) {
190             /* truncate after ']' and re-add normalized port, if needed */
191             buffer_truncate(b, (size_t)(bracket - b->ptr + 1));
192             break;
193         }
194 
195         *bracket = '\0';/*(terminate IPv6 string)*/
196         if (percent) *percent = '\0'; /*(remove %interface from address)*/
197         rc = sock_addr_inet_pton(&addr, b->ptr+1, AF_INET6, 0);
198         if (percent) *percent = '%'; /*(restore %interface)*/
199         *bracket = ']'; /*(restore bracket)*/
200         if (1 != rc) return -1;
201 
202         sock_addr_inet_ntop(&addr, buf, sizeof(buf));
203         len = strlen(buf);
204         if (percent) {
205             if (percent > bracket) return -1;
206             if (len + (size_t)(bracket - percent) >= sizeof(buf)) return -1;
207             if (len < sizeof(laddr.s)) memcpy(laddr.s, buf, (laddr.n = len));
208             memcpy(buf+len, percent, (size_t)(bracket - percent));
209             len += (size_t)(bracket - percent);
210         }
211         buffer_truncate(b, 1); /* truncate after '[' */
212         buffer_append_str2(b, buf, len, CONST_STR_LEN("]"));
213 
214       #else
215 
216         return -1;
217 
218       #endif
219     } while (0);
220 
221     if (0 != port && port != scheme_port) {
222         buffer_append_string_len(b, CONST_STR_LEN(":"));
223         buffer_append_int(b, (int)port);
224     }
225 
226     return 0;
227 }
228 
229 int http_request_host_policy (buffer * const b, const unsigned int http_parseopts, const int scheme_port) {
230     /* caller should lowercase, as is done in http_request_header_set_Host(),
231      * for consistency in case the value is used prior to calling policy func */
232     /*buffer_to_lower(b);*/
233     return (((http_parseopts & HTTP_PARSEOPT_HOST_STRICT)
234              && 0 != request_check_hostname(b))
235             || ((http_parseopts & HTTP_PARSEOPT_HOST_NORMALIZE)
236                 && 0 != http_request_host_normalize(b, scheme_port)));
237 }
238 
239 __attribute_const__
240 static int request_uri_is_valid_char(const unsigned char c) {
241 	return (c > 32 && c != 127 && c != 255);
242 }
243 
244 __attribute_cold__
245 __attribute_noinline__
246 static int http_request_header_line_invalid(request_st * const restrict r, const int status, const char * const restrict msg) {
247     if (r->conf.log_request_header_on_error) {
248         if (msg) log_error(r->conf.errh, __FILE__, __LINE__, "%s", msg);
249     }
250     return status;
251 }
252 
253 __attribute_cold__
254 __attribute_noinline__
255 static int http_request_header_char_invalid(request_st * const restrict r, const char ch, const char * const restrict msg) {
256     if (r->conf.log_request_header_on_error) {
257         if ((unsigned char)ch > 32 && ch != 127) {
258             log_error(r->conf.errh, __FILE__, __LINE__, "%s ('%c')", msg, ch);
259         }
260         else {
261             log_error(r->conf.errh, __FILE__, __LINE__, "%s (0x%x)", msg, ch);
262         }
263     }
264     return 400;
265 }
266 
267 
268 __attribute_noinline__
269 static void http_request_header_set_Host(request_st * const restrict r, const char * const h, size_t hlen)
270 {
271     r->http_host = http_header_request_set_ptr(r, HTTP_HEADER_HOST,
272                                                CONST_STR_LEN("Host"));
273     buffer_copy_string_len_lc(r->http_host, h, hlen);
274 }
275 
276 
277 int64_t
278 li_restricted_strtoint64 (const char *v, const uint32_t vlen, const char ** const err)
279 {
280     /* base 10 strtoll() parsing exactly vlen chars and requiring digits 0-9 */
281     /* rejects negative numbers and considers values > INT64_MAX an error */
282     /* note: errno is not set; detect error if *err != v+vlen upon return */
283     /*(caller must check 0 == vlen if that is to be an error for caller)*/
284     int64_t rv = 0;
285     uint32_t i;
286     for (i = 0; i < vlen; ++i) {
287         const uint8_t c = ((uint8_t *)v)[i] - '0'; /*(unsigned; underflow ok)*/
288         if (c > 9) break;
289         if (rv > INT64_MAX/10) break;
290         rv *= 10;
291         if (rv > INT64_MAX - c) break;
292         rv += c;
293     }
294     *err = v+i;
295     return rv;
296 }
297 
298 
299 __attribute_cold__
300 static int http_request_parse_duplicate(request_st * const restrict r, const enum http_header_e id, const char * const restrict k, const size_t klen, const char * const restrict v, const size_t vlen) {
301     /* Proxies sometimes send dup headers
302      * if they are the same we ignore the second
303      * if not, we raise an error */
304     const buffer * const vb = http_header_request_get(r, id, k, klen);
305     if (vb && buffer_eq_icase_slen(vb, v, vlen))
306         return 0; /* ignore header; matches existing header */
307 
308     const char *errmsg;
309     switch (id) {
310       case HTTP_HEADER_HOST:
311         errmsg = "duplicate Host header -> 400";
312         break;
313       case HTTP_HEADER_CONTENT_TYPE:
314         errmsg = "duplicate Content-Type header -> 400";
315         break;
316       case HTTP_HEADER_IF_MODIFIED_SINCE:
317         errmsg = "duplicate If-Modified-Since header -> 400";
318         break;
319       default:
320         errmsg = "duplicate header -> 400";
321         break;
322     }
323     return http_request_header_line_invalid(r, 400, errmsg);
324 }
325 
326 
327 /* add header to list of headers
328  * certain headers are also parsed
329  * might drop a header if deemed unnecessary/broken
330  *
331  * returns 0 on success, HTTP status on error
332  */
333 static int http_request_parse_single_header(request_st * const restrict r, const enum http_header_e id, const char * const restrict k, const size_t klen, const char * const restrict v, const size_t vlen) {
334     /*
335      * Note: k might not be '\0'-terminated
336      * Note: v is not '\0'-terminated
337      *   With lighttpd HTTP/1.1 parser, v ends with whitespace
338      *     (one of '\r' '\n' ' ' '\t')
339      *   With lighttpd HTTP/2 parser, v should not be accessed beyond vlen
340      *     (care must be taken to avoid libc funcs which expect z-strings)
341      */
342     /*assert(vlen);*//*(caller must not call this func with 0 klen or 0 vlen)*/
343 
344     switch (id) {
345       /*case HTTP_HEADER_OTHER:*/
346       default:
347         break;
348       case HTTP_HEADER_HOST:
349         if (!light_btst(r->rqst_htags, HTTP_HEADER_HOST)) {
350             if (vlen >= 1024) { /*(expecting < 256)*/
351                 return http_request_header_line_invalid(r, 400, "uri-authority too long -> 400");
352             }
353             /*(http_request_header_append() plus sets r->http_host)*/
354             http_request_header_set_Host(r, v, vlen);
355             return 0;
356         }
357         else if (NULL != r->http_host
358                  && __builtin_expect( buffer_eq_slen(r->http_host,v,vlen), 1)) {
359             /* ignore all Host: headers if match authority in request line */
360             /* (expect Host to match case in :authority of HTTP/2 request) */
361             return 0; /* ignore header */
362         }
363         else {
364             return http_request_parse_duplicate(r, id, k, klen, v, vlen);
365         }
366         break;
367       case HTTP_HEADER_CONNECTION:
368         /* "Connection: close" is common case if header is present */
369         if ((vlen == 5 && buffer_eq_icase_ssn(v, CONST_STR_LEN("close")))
370             || http_header_str_contains_token(v,vlen,CONST_STR_LEN("close"))) {
371             r->keep_alive = 0;
372             break;
373         }
374         if (http_header_str_contains_token(v,vlen,CONST_STR_LEN("keep-alive"))){
375             r->keep_alive = 1;
376             break;
377         }
378         break;
379       case HTTP_HEADER_CONTENT_TYPE:
380         if (light_btst(r->rqst_htags, HTTP_HEADER_CONTENT_TYPE)) {
381             return http_request_parse_duplicate(r, id, k, klen, v, vlen);
382         }
383         break;
384       case HTTP_HEADER_IF_NONE_MATCH:
385         /* if dup, only the first one will survive */
386         if (light_btst(r->rqst_htags, HTTP_HEADER_IF_NONE_MATCH)) {
387             return 0; /* ignore header */
388         }
389         break;
390       case HTTP_HEADER_CONTENT_LENGTH:
391         if (!light_btst(r->rqst_htags, HTTP_HEADER_CONTENT_LENGTH)) {
392             /*(trailing whitespace was removed from vlen)*/
393             /*(not using strtoll() since v might not be z-string)*/
394             const char *err;
395             off_t clen = (off_t)li_restricted_strtoint64(v, vlen, &err);
396             if (err == v+vlen) {
397                 /* (set only if not set to -1 by Transfer-Encoding: chunked) */
398                 if (0 == r->reqbody_length) r->reqbody_length = clen;
399             }
400             else {
401                 return http_request_header_line_invalid(r, 400, "invalid Content-Length header -> 400");
402             }
403         }
404         else {
405             return http_request_header_line_invalid(r, 400, "duplicate Content-Length header -> 400");
406         }
407         break;
408       case HTTP_HEADER_HTTP2_SETTINGS:
409         if (light_btst(r->rqst_htags, HTTP_HEADER_HTTP2_SETTINGS)) {
410             return http_request_header_line_invalid(r, 400, "duplicate HTTP2-Settings header -> 400");
411         }
412         break;
413       case HTTP_HEADER_IF_MODIFIED_SINCE:
414         if (light_btst(r->rqst_htags, HTTP_HEADER_IF_MODIFIED_SINCE)) {
415             return http_request_parse_duplicate(r, id, k, klen, v, vlen);
416         }
417         break;
418       case HTTP_HEADER_TRANSFER_ENCODING:
419         if (HTTP_VERSION_1_1 != r->http_version) {
420             return http_request_header_line_invalid(r, 400,
421               HTTP_VERSION_1_0 == r->http_version
422                 ? "HTTP/1.0 with Transfer-Encoding (bad HTTP/1.0 proxy?) -> 400"
423                 : "HTTP/2 with Transfer-Encoding is invalid -> 400");
424         }
425 
426         if (!buffer_eq_icase_ss(v, vlen, CONST_STR_LEN("chunked"))) {
427             /* Transfer-Encoding might contain additional encodings,
428              * which are not currently supported by lighttpd */
429             return http_request_header_line_invalid(r, 501, NULL); /* Not Implemented */
430         }
431         r->reqbody_length = -1;
432 
433         /* Transfer-Encoding is a hop-by-hop header,
434          * which must not be blindly forwarded to backends */
435         return 0; /* skip header */
436     }
437 
438     http_header_request_append(r, id, k, klen, v, vlen);
439     return 0;
440 }
441 
442 __attribute_cold__
443 static int http_request_parse_proto_loose(request_st * const restrict r, const char * const restrict ptr, const size_t len, const unsigned int http_parseopts) {
444     const char * proto = memchr(ptr, ' ', len);
445     if (NULL == proto)
446         return http_request_header_line_invalid(r, 400, "incomplete request line -> 400");
447     proto = memchr(proto+1, ' ', len - (proto+1 - ptr));
448     if (NULL == proto)
449         return http_request_header_line_invalid(r, 400, "incomplete request line -> 400");
450     ++proto;
451 
452     if (proto[0]=='H' && proto[1]=='T' && proto[2]=='T' && proto[3]=='P' && proto[4] == '/') {
453         if (proto[5] == '1' && proto[6] == '.' && (proto[7] == '1' || proto[7] == '0')) {
454             /* length already checked before calling this routine */
455             /* (len != (size_t)(proto - ptr + 8)) */
456             if (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT) /*(http_header_strict)*/
457                 return http_request_header_line_invalid(r, 400, "incomplete request line -> 400");
458             r->http_version = (proto[7] == '1') ? HTTP_VERSION_1_1 : HTTP_VERSION_1_0;
459         }
460         else
461             return http_request_header_line_invalid(r, 505, "unknown HTTP version -> 505");
462     }
463     else
464         return http_request_header_line_invalid(r, 400, "unknown protocol -> 400");
465 
466     /* keep-alive default: HTTP/1.1 -> true; HTTP/1.0 -> false */
467     r->keep_alive = (HTTP_VERSION_1_0 != r->http_version);
468 
469     return 0;
470 }
471 
472 __attribute_cold__
473 static const char * http_request_parse_reqline_uri(request_st * const restrict r, const char * const restrict uri, const size_t len, const unsigned int http_parseopts) {
474     const char *nuri;
475     if ((len > 7 && buffer_eq_icase_ssn(uri, "http://", 7)
476         && NULL != (nuri = memchr(uri + 7, '/', len-7)))
477        ||
478        (len > 8 && buffer_eq_icase_ssn(uri, "https://", 8)
479         && NULL != (nuri = memchr(uri + 8, '/', len-8)))) {
480         const char * const host = uri + (uri[4] == ':' ? 7 : 8);
481         const size_t hostlen = nuri - host;
482         if (0 == hostlen || hostlen >= 1024) { /*(expecting < 256)*/
483             http_request_header_line_invalid(r, 400, "uri-authority empty or too long -> 400");
484             return NULL;
485         }
486         /* Insert as "Host" header */
487         http_request_header_set_Host(r, host, hostlen);
488         return nuri;
489     } else if (!(http_parseopts & HTTP_PARSEOPT_HEADER_STRICT) /*(!http_header_strict)*/
490            || (HTTP_METHOD_CONNECT == r->http_method && (uri[0] == ':' || light_isdigit(uri[0])))
491            || (HTTP_METHOD_OPTIONS == r->http_method && uri[0] == '*' && 1 == len)) {
492         /* (permitted) */
493         return uri;
494     } else {
495         http_request_header_line_invalid(r, 400, "request-URI parse error -> 400");
496         return NULL;
497     }
498 }
499 
500 
501 __attribute_cold__
502 __attribute_noinline__
503 static int http_request_parse_header_other(request_st * const restrict r, const char * const restrict k, const int klen, const unsigned int http_header_strict);
504 
505 
506 int
507 http_request_validate_pseudohdrs (request_st * const restrict r, const int scheme, const unsigned int http_parseopts)
508 {
509     /* :method is required to indicate method
510      * CONNECT method must have :method and :authority
511      * All other methods must have at least :method :scheme :path */
512 
513     if (HTTP_METHOD_UNSET == r->http_method)
514         return http_request_header_line_invalid(r, 400,
515           "missing pseudo-header method -> 400");
516 
517     if (__builtin_expect( (HTTP_METHOD_CONNECT != r->http_method), 1)) {
518         if (!scheme)
519             return http_request_header_line_invalid(r, 400,
520               "missing pseudo-header scheme -> 400");
521 
522         if (buffer_is_blank(&r->target))
523             return http_request_header_line_invalid(r, 400,
524               "missing pseudo-header path -> 400");
525 
526         const char * const uri = r->target.ptr;
527         if (*uri != '/') { /* (common case: (*uri == '/')) */
528             if (uri[0] != '*' || uri[1] != '\0'
529                 || HTTP_METHOD_OPTIONS != r->http_method)
530                 return http_request_header_line_invalid(r, 400,
531                   "invalid pseudo-header path -> 400");
532         }
533     }
534     else { /* HTTP_METHOD_CONNECT */
535         if (NULL == r->http_host)
536             return http_request_header_line_invalid(r, 400,
537               "missing pseudo-header authority -> 400");
538         if (!buffer_is_blank(&r->target) || scheme)
539             return http_request_header_line_invalid(r, 400,
540               "invalid pseudo-header with CONNECT -> 400");
541         /* note: this copy occurs prior to http_request_host_policy()
542          * so any consumer handling CONNECT should normalize r->target
543          * as appropriate */
544         buffer_copy_buffer(&r->target, r->http_host);
545     }
546     buffer_copy_buffer(&r->target_orig, &r->target);
547 
548     /* r->http_host, if set, is checked with http_request_host_policy()
549      * in http_request_parse() */
550 
551     /* copied and modified from end of http_request_parse_reqline() */
552 
553     /* check uri for invalid characters */
554     const unsigned int http_header_strict =
555       (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT);
556     const uint32_t ulen = buffer_clen(&r->target);
557     const uint8_t * const uri = (uint8_t *)r->target.ptr;
558     if (http_header_strict) {
559         if (http_parseopts & HTTP_PARSEOPT_URL_NORMALIZE_CTRLS_REJECT)
560             return 0; /* URI will be checked in http_request_parse_target() */
561         for (uint32_t i = 0; i < ulen; ++i) {
562             if (!request_uri_is_valid_char(uri[i]))
563                 return http_request_header_char_invalid(r, uri[i],
564                   "invalid character in URI -> 400");
565         }
566     }
567     else {
568         if (NULL != memchr(uri, '\0', ulen))
569             return http_request_header_char_invalid(r, '\0',
570               "invalid character in header -> 400");
571     }
572 
573     return 0;
574 }
575 
576 
577 int
578 http_request_parse_header (request_st * const restrict r, http_header_parse_ctx * const restrict hpctx)
579 {
580     /* Note: k and v might not be '\0' terminated strings;
581      * care must be taken to avoid libc funcs which expect z-strings */
582     const char * const restrict k = hpctx->k;
583     const char * const restrict v = hpctx->v;
584     const uint32_t klen = hpctx->klen;
585     const uint32_t vlen = hpctx->vlen;
586 
587     if (0 == klen)
588         return http_request_header_line_invalid(r, 400,
589           "invalid header key -> 400");
590 
591     if ((hpctx->hlen += klen + vlen + 4) > hpctx->max_request_field_size) {
592         /*(configurable with server.max-request-field-size; default 8k)*/
593       #if 1 /* emit to error log for people sending large headers */
594         log_error(r->conf.errh, __FILE__, __LINE__,
595                   "oversized request header -> 431");
596         return 431; /* Request Header Fields Too Large */
597       #else
598         /* 431 Request Header Fields Too Large */
599         return http_request_header_line_invalid(r, 431,
600           "oversized request header -> 431");
601       #endif
602     }
603 
604     if (!hpctx->trailers) {
605         if (*k == ':') {
606             /* HTTP/2 request pseudo-header fields */
607             if (!hpctx->pseudo) /*(pseudo header after non-pseudo header)*/
608                 return http_request_header_line_invalid(r, 400,
609                   "invalid pseudo-header -> 400");
610             if (0 == vlen)
611                 return http_request_header_line_invalid(r, 400,
612                   "invalid header value -> 400");
613 
614             /* (note: relies on implementation details using ls-hpack in h2.c)
615              * (hpctx->id mapped from lsxpack_header_t hpack_index, which only
616              *  matches key, not also value, if lsxpack_header_t flags does not
617              *  have LSXPACK_HPACK_VAL_MATCHED set, so HTTP_HEADER_H2_METHOD_GET
618              *  below indicates any method, not only "GET") */
619             if (__builtin_expect( (hpctx->id == HTTP_HEADER_H2_UNKNOWN), 0)) {
620                 switch (klen-1) {
621                   case 4:
622                     if (0 == memcmp(k+1, "path", 4))
623                         hpctx->id = HTTP_HEADER_H2_PATH;
624                     break;
625                   case 6:
626                     if (0 == memcmp(k+1, "method", 6))
627                         hpctx->id = HTTP_HEADER_H2_METHOD_GET;
628                     else if (0 == memcmp(k+1, "scheme", 6))
629                         hpctx->id = HTTP_HEADER_H2_SCHEME_HTTP;
630                     break;
631                   case 9:
632                     if (0 == memcmp(k+1, "authority", 9))
633                         hpctx->id = HTTP_HEADER_H2_AUTHORITY;
634                     break;
635                   default:
636                     break;
637                 }
638                 if (hpctx->id >= HTTP_HEADER_H2_UNKNOWN)
639                     return http_request_header_line_invalid(r, 400,
640                       "invalid pseudo-header -> 400");
641             }
642 
643             switch (hpctx->id) {
644               case HTTP_HEADER_H2_AUTHORITY:
645                 if (__builtin_expect( (r->http_host != NULL), 0))
646                     break;
647                 if (vlen >= 1024) /*(expecting < 256)*/
648                     return http_request_header_line_invalid(r, 400,
649                       "invalid pseudo-header authority too long -> 400");
650                 /* insert as "Host" header */
651                 http_request_header_set_Host(r, v, vlen);
652                 return 0;
653               case HTTP_HEADER_H2_METHOD_GET:  /*(any method, not only "GET")*/
654               case HTTP_HEADER_H2_METHOD_POST:
655                 if (__builtin_expect( (HTTP_METHOD_UNSET != r->http_method), 0))
656                     break;
657                 r->http_method = get_http_method_key(v, vlen);
658                 if (HTTP_METHOD_UNSET >= r->http_method)
659                     return http_request_header_line_invalid(r, 501,
660                       "unknown http-method -> 501");
661                 return 0;
662               case HTTP_HEADER_H2_PATH:            /*(any path, not only "/")*/
663               case HTTP_HEADER_H2_PATH_INDEX_HTML:
664                 if (__builtin_expect( (!buffer_is_blank(&r->target)), 0))
665                     break;
666                 buffer_copy_string_len(&r->target, v, vlen);
667                 return 0;
668               case HTTP_HEADER_H2_SCHEME_HTTP: /*(any scheme, not only "http")*/
669               case HTTP_HEADER_H2_SCHEME_HTTPS:
670                 if (__builtin_expect( (hpctx->scheme), 0))
671                     break;
672                 hpctx->scheme = 1; /*(marked present, but otherwise ignored)*/
673                 return 0;
674                #if 0
675                 switch (vlen) {/*(validated, but then ignored)*/
676                   case 5: /* "https" */
677                     if (v[4]!='s') break;
678                     __attribute_fallthrough__
679                   case 4: /* "http" */
680                     if (v[0]=='h' && v[1]=='t' && v[2]=='t' && v[3]=='p') {
681                         hpctx->scheme = 1;
682                         return 0;
683                     }
684                     break;
685                   default:
686                     break;
687                 }
688                 return http_request_header_line_invalid(r, 400,
689                   "unknown pseudo-header scheme -> 400");
690                #endif
691               default:
692                 return http_request_header_line_invalid(r, 400,
693                   "invalid pseudo-header -> 400");
694             }
695             return http_request_header_line_invalid(r, 400,
696               "repeated pseudo-header -> 400");
697         }
698         else { /*(non-pseudo headers)*/
699             if (hpctx->pseudo) { /*(transition to non-pseudo headers)*/
700                 hpctx->pseudo = 0;
701                 int status =
702                   http_request_validate_pseudohdrs(r, hpctx->scheme,
703                                                    hpctx->http_parseopts);
704                 if (0 != status) return status;
705             }
706             if (0 == vlen)
707                 return 0;
708 
709             const unsigned int http_header_strict =
710               (hpctx->http_parseopts & HTTP_PARSEOPT_HEADER_STRICT);
711 
712             if (http_header_strict) {
713                 for (uint32_t j = 0; j < vlen; ++j) {
714                     if ((((uint8_t *)v)[j] < 32 && v[j] != '\t') || v[j]==127)
715                         return http_request_header_char_invalid(r, v[j],
716                           "invalid character in header -> 400");
717                 }
718             }
719             else {
720                 if (NULL != memchr(v, '\0', vlen))
721                     return http_request_header_char_invalid(r, '\0',
722                       "invalid character in header -> 400");
723             }
724 
725             if (__builtin_expect( (hpctx->id == HTTP_HEADER_H2_UNKNOWN), 0)) {
726                 uint32_t j = 0;
727                 while (j < klen && (light_islower(k[j]) || k[j] == '-'))
728                     ++j;
729 
730                 if (__builtin_expect( (j != klen), 0)) {
731                     if (light_isupper(k[j]))
732                         return 400;
733                     if (0 != http_request_parse_header_other(r, k+j, klen-j,
734                                                             http_header_strict))
735                         return 400;
736                 }
737 
738                 hpctx->id = http_header_hkey_get_lc(k, klen);
739             }
740 
741             const enum http_header_e id = (enum http_header_e)hpctx->id;
742 
743             if (__builtin_expect( (id == HTTP_HEADER_TE), 0)
744                 && !buffer_eq_icase_ss(v, vlen, CONST_STR_LEN("trailers")))
745                 return http_request_header_line_invalid(r, 400,
746                   "invalid TE header value with HTTP/2 -> 400");
747 
748             return http_request_parse_single_header(r, id, k, klen, v, vlen);
749         }
750     }
751     else { /*(trailers)*/
752         if (*k == ':')
753             return http_request_header_line_invalid(r, 400,
754               "invalid pseudo-header in trailers -> 400");
755         /* ignore trailers (after required HPACK decoding) if streaming
756          * request body to backend since headers have already been sent
757          * to backend via Common Gateway Interface (CGI) (CGI, FastCGI,
758          * SCGI, etc) or HTTP/1.1 (proxy) (mod_proxy does not currently
759          * support using HTTP/2 to connect to backends) */
760       #if 0 /* (if needed, save flag in hpctx instead of fdevent.h dependency)*/
761         if (r->conf.stream_request_body & FDEVENT_STREAM_REQUEST)
762             return 0;
763       #endif
764         /* Note: do not unconditionally merge into headers since if
765          * headers had already been sent to backend, then mod_accesslog
766          * logging of request headers might be inaccurate.
767          * Many simple backends do not support HTTP/1.1 requests sending
768          * Transfer-Encoding: chunked, and even those that do might not
769          * handle trailers.  Some backends do not even support HTTP/1.1.
770          * For all these reasons, ignore trailers if streaming request
771          * body to backend.  Revisit in future if adding support for
772          * connecting to backends using HTTP/2 (with explicit config
773          * option to force connecting to backends using HTTP/2) */
774 
775         /* XXX: TODO: request trailers not handled if streaming reqbody
776          * XXX: must ensure that trailers are not disallowed field-names
777          */
778 
779       #if 0
780         if (0 == vlen)
781             return 0;
782       #endif
783 
784         return 0;
785     }
786 }
787 
788 
789 static int http_request_parse_reqline(request_st * const restrict r, const char * const restrict ptr, const unsigned short * const restrict hoff, const unsigned int http_parseopts) {
790     size_t len = hoff[2];
791 
792     /* parse the first line of the request
793      * <method> <uri> <protocol>\r\n
794      * */
795     if (len < 13) /* minimum len with (!http_header_strict): "x x HTTP/1.0\n" */
796         return http_request_header_line_invalid(r, 400, "invalid request line (too short) -> 400");
797     if (ptr[len-2] == '\r')
798         len-=2;
799     else if (!(http_parseopts & HTTP_PARSEOPT_HEADER_STRICT)) /*(!http_header_strict)*/
800         len-=1;
801     else
802         return http_request_header_line_invalid(r, 400, "missing CR before LF in header -> 400");
803 
804     /*
805      * RFC7230:
806      *   HTTP-version  = HTTP-name "/" DIGIT "." DIGIT
807      *   HTTP-name     = %x48.54.54.50 ; "HTTP", case-sensitive
808      */
809 
810     /* protocol is expected to be " HTTP/1.1" or " HTTP/1.0" at end of line */
811     union proto_un {
812       char c[8];
813       uint64_t u;
814     };
815     static const union proto_un http_1_1 = {{'H','T','T','P','/','1','.','1'}};
816     static const union proto_un http_1_0 = {{'H','T','T','P','/','1','.','0'}};
817     const char *p = ptr + len - 8;
818     union proto_un proto8;
819     proto8.c[0]=p[0]; proto8.c[1]=p[1]; proto8.c[2]=p[2]; proto8.c[3]=p[3];
820     proto8.c[4]=p[4]; proto8.c[5]=p[5]; proto8.c[6]=p[6]; proto8.c[7]=p[7];
821     if (p[-1] == ' ' && http_1_1.u == proto8.u) {
822         r->http_version = HTTP_VERSION_1_1;
823         r->keep_alive = 1; /* keep-alive default: HTTP/1.1 -> true */
824     }
825     else if (p[-1] == ' ' && http_1_0.u == proto8.u) {
826         r->http_version = HTTP_VERSION_1_0;
827         r->keep_alive = 0; /* keep-alive default: HTTP/1.0 -> false */
828     }
829     else {
830         int status = http_request_parse_proto_loose(r,ptr,len,http_parseopts);
831         if (0 != status) return status;
832         /*(space char must exist if http_request_parse_proto_loose() succeeds)*/
833         for (p = ptr + len - 9; p[-1] != ' '; --p) ;
834     }
835 
836     /* method is expected to be a short string in the general case */
837     size_t i = 0;
838     while (ptr[i] != ' ') ++i;
839   #if 0 /*(space must exist if protocol was parsed successfully)*/
840     while (i < len && ptr[i] != ' ') ++i;
841     if (ptr[i] != ' ')
842         return http_request_header_line_invalid(r, 400, "incomplete request line -> 400");
843   #endif
844 
845     r->http_method = get_http_method_key(ptr, i);
846     if (HTTP_METHOD_UNSET >= r->http_method)
847         return http_request_header_line_invalid(r, 501, "unknown http-method -> 501");
848 
849     const char *uri = ptr + i + 1;
850 
851     if (uri == p)
852         return http_request_header_line_invalid(r, 400, "no uri specified -> 400");
853     len = (size_t)(p - uri - 1);
854 
855     if (*uri != '/') { /* (common case: (*uri == '/')) */
856         uri = http_request_parse_reqline_uri(r, uri, len, http_parseopts);
857         if (NULL == uri) return 400;
858         len = (size_t)(p - uri - 1);
859     }
860 
861     if (0 == len)
862         return http_request_header_line_invalid(r, 400, "no uri specified -> 400");
863 
864     /* check uri for invalid characters */
865     if (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT) { /* http_header_strict */
866         if ((http_parseopts & HTTP_PARSEOPT_URL_NORMALIZE_CTRLS_REJECT)) {
867             /* URI will be checked in http_request_parse_target() */
868         }
869         else {
870             for (i = 0; i < len; ++i) {
871                 if (!request_uri_is_valid_char(uri[i]))
872                     return http_request_header_char_invalid(r, uri[i], "invalid character in URI -> 400");
873             }
874         }
875     }
876     else {
877         /* check entire set of request headers for '\0' */
878         if (NULL != memchr(ptr, '\0', hoff[hoff[0]]))
879             return http_request_header_char_invalid(r, '\0', "invalid character in header -> 400");
880     }
881 
882     buffer_copy_string_len(&r->target, uri, len);
883     buffer_copy_string_len(&r->target_orig, uri, len);
884     return 0;
885 }
886 
887 int http_request_parse_target(request_st * const r, int scheme_port) {
888     /* URI is parsed into components at start of request and may
889      * also be re-parsed upon HANDLER_COMEBACK during the request
890      * r->target is expected to be a "/url-part?query-part"
891      *   (and *not* a fully-qualified URI starting https://...)
892      * r->uri.authority is expected to be parsed elsewhere into r->http_host
893      */
894 
895     /**
896      * prepare strings
897      *
898      * - uri.path
899      * - uri.query
900      *
901      */
902 
903     /**
904      * Name according to RFC 2396
905      *
906      * - scheme
907      * - authority
908      * - path
909      * - query
910      *
911      * (scheme)://(authority)(path)?(query)#fragment
912      *
913      */
914 
915     /* take initial scheme value from connection-level state
916      * (request r->uri.scheme can be overwritten for later,
917      *  for example by mod_extforward or mod_magnet) */
918     buffer_copy_string_len(&r->uri.scheme, "https", scheme_port == 443 ? 5 : 4);
919 
920     buffer * const target = &r->target;
921     if (r->http_method == HTTP_METHOD_CONNECT
922         || (r->http_method == HTTP_METHOD_OPTIONS
923             && target->ptr[0] == '*'
924             && target->ptr[1] == '\0')) {
925         /* CONNECT ... (or) OPTIONS * ... */
926         buffer_copy_buffer(&r->uri.path, target);
927         buffer_clear(&r->uri.query);
928         return 0;
929     }
930 
931     char *qstr;
932     if (r->conf.http_parseopts & HTTP_PARSEOPT_URL_NORMALIZE) {
933         /*uint32_t len = buffer_clen(target);*/
934         int qs = burl_normalize(target, r->tmp_buf, r->conf.http_parseopts);
935         if (-2 == qs)
936             return http_request_header_line_invalid(r, 400,
937               "invalid character in URI -> 400"); /* Bad Request */
938         qstr = (-1 == qs) ? NULL : target->ptr+qs;
939       #if 0  /* future: might enable here, or below for all requests */
940         /* (Note: total header size not recalculated on HANDLER_COMEBACK
941          *  even if other request headers changed during processing)
942          * (If (0 != r->loops_per_request), then the generated
943          *  request is too large.  Should a different error be returned?) */
944         r->rqst_header_len -= len;
945         len = buffer_clen(target);
946         r->rqst_header_len += len;
947         if (len > MAX_HTTP_REQUEST_URI) {
948             return 414; /* 414 URI Too Long */
949         }
950         if (r->rqst_header_len > MAX_HTTP_REQUEST_HEADER) {
951             log_error(r->conf.errh, __FILE__, __LINE__,
952               "request header fields too large: %u -> 431",
953               r->rqst_header_len);
954             return 431; /* Request Header Fields Too Large */
955         }
956       #endif
957     }
958     else {
959         size_t rlen = buffer_clen(target);
960         qstr = memchr(target->ptr, '#', rlen);/* discard fragment */
961         if (qstr) {
962             rlen = (size_t)(qstr - target->ptr);
963             buffer_truncate(target, rlen);
964         }
965         qstr = memchr(target->ptr, '?', rlen);
966     }
967 
968     /** extract query string from target */
969     const char * const pstr = target->ptr;
970     const uint32_t rlen = buffer_clen(target);
971     uint32_t plen;
972     if (NULL != qstr) {
973         plen = (uint32_t)(qstr - pstr);
974         buffer_copy_string_len(&r->uri.query, qstr + 1, rlen - plen - 1);
975     }
976     else {
977         plen = rlen;
978         buffer_clear(&r->uri.query);
979     }
980     buffer_copy_string_len(&r->uri.path, pstr, plen);
981 
982     /* decode url to path
983      *
984      * - decode url-encodings  (e.g. %20 -> ' ')
985      * - remove path-modifiers (e.g. /../)
986      */
987 
988     buffer_urldecode_path(&r->uri.path);
989     buffer_path_simplify(&r->uri.path);
990     if (r->uri.path.ptr[0] != '/')
991         return http_request_header_line_invalid(r, 400,
992           "uri-path does not begin with '/' -> 400"); /* Bad Request */
993 
994     return 0;
995 }
996 
997 __attribute_cold__
998 __attribute_noinline__
999 static int http_request_parse_header_other(request_st * const restrict r, const char * const restrict k, const int klen, const unsigned int http_header_strict) {
1000     for (int i = 0; i < klen; ++i) {
1001         if (light_isalpha(k[i]) || k[i] == '-') continue; /*(common cases)*/
1002         /**
1003          * 1*<any CHAR except CTLs or separators>
1004          * CTLs == 0-31 + 127, CHAR = 7-bit ascii (0..127)
1005          *
1006          */
1007         switch(k[i]) {
1008         case ' ':
1009         case '\t':
1010             return http_request_header_line_invalid(r, 400, "WS character in key -> 400");
1011         case '(':
1012         case ')':
1013         case '<':
1014         case '>':
1015         case '@':
1016         case ',':
1017         case ';':
1018         case '\\':
1019         case '\"':
1020         case '/':
1021         case '[':
1022         case ']':
1023         case '?':
1024         case '=':
1025         case '{':
1026         case '}':
1027             return http_request_header_char_invalid(r, k[i], "invalid character in header key -> 400");
1028         default:
1029             if (http_header_strict ? (k[i] < 32 || ((unsigned char *)k)[i] >= 127) : k[i] == '\0')
1030                 return http_request_header_char_invalid(r, k[i], "invalid character in header key -> 400");
1031             break; /* ok */
1032         }
1033     }
1034     return 0;
1035 }
1036 
1037 static int http_request_parse_headers(request_st * const restrict r, char * const restrict ptr, const unsigned short * const restrict hoff, const unsigned int http_parseopts) {
1038     const unsigned int http_header_strict = (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT);
1039 
1040   #if 0 /*(not checked here; will later result in invalid label for HTTP header)*/
1041     int i = hoff[2];
1042 
1043     if (ptr[i] == ' ' || ptr[i] == '\t') {
1044         return http_request_header_line_invalid(r, 400, "WS at the start of first line -> 400");
1045     }
1046   #endif
1047 
1048     for (int i = 2; i < hoff[0]; ++i) {
1049         const char *k = ptr + hoff[i];
1050         /* one past last line hoff[hoff[0]] is to final "\r\n" */
1051         char *end = ptr + hoff[i+1];
1052 
1053         const char *colon = memchr(k, ':', end - k);
1054         if (NULL == colon)
1055             return http_request_header_line_invalid(r, 400, "invalid header missing ':' -> 400");
1056 
1057         const char *v = colon + 1;
1058 
1059         /* RFC7230 Hypertext Transfer Protocol (HTTP/1.1): Message Syntax and Routing
1060          * 3.2.4.  Field Parsing
1061          * [...]
1062          * No whitespace is allowed between the header field-name and colon.  In
1063          * the past, differences in the handling of such whitespace have led to
1064          * security vulnerabilities in request routing and response handling.  A
1065          * server MUST reject any received request message that contains
1066          * whitespace between a header field-name and colon with a response code
1067          * of 400 (Bad Request).  A proxy MUST remove any such whitespace from a
1068          * response message before forwarding the message downstream.
1069          */
1070         /* (line k[-1] is always preceded by a '\n',
1071          *  including first header after request-line,
1072          *  so no need to check colon != k) */
1073         if (colon[-1] == ' ' || colon[-1] == '\t') {
1074             if (http_header_strict) {
1075                 return http_request_header_line_invalid(r, 400, "invalid whitespace between field-name and colon -> 400");
1076             }
1077             else {
1078                 /* remove trailing whitespace from key(if !http_header_strict)*/
1079                 do { --colon; } while (colon[-1] == ' ' || colon[-1] == '\t');
1080             }
1081         }
1082 
1083         const int klen = (int)(colon - k);
1084         if (0 == klen)
1085             return http_request_header_line_invalid(r, 400, "invalid header key -> 400");
1086         const enum http_header_e id = http_header_hkey_get(k, klen);
1087 
1088         if (id == HTTP_HEADER_OTHER) {
1089             for (int j = 0; j < klen; ++j) {
1090                 if (light_isalpha(k[j]) || k[j] == '-') continue; /*(common cases)*/
1091                 if (0 != http_request_parse_header_other(r, k+j, klen-j, http_header_strict))
1092                     return 400;
1093                 break;
1094             }
1095         }
1096 
1097         /* remove leading whitespace from value */
1098         while (*v == ' ' || *v == '\t') ++v;
1099 
1100         for (; i+1 <= hoff[0]; ++i) {
1101             end = ptr + hoff[i+1];
1102             if (end[0] != ' ' && end[0] != '\t') break;
1103 
1104             /* line folding */
1105           #ifdef __COVERITY__
1106             force_assert(end - k >= 2);
1107           #endif
1108             if (end[-2] == '\r')
1109                 end[-2] = ' ';
1110             else if (http_header_strict)
1111                 return http_request_header_line_invalid(r, 400, "missing CR before LF in header -> 400");
1112             end[-1] = ' ';
1113         }
1114       #ifdef __COVERITY__
1115         /*(buf holding k has non-zero request-line, so end[-2] valid)*/
1116         force_assert(end >= k + 2);
1117       #endif
1118         if (end[-2] == '\r')
1119             --end;
1120         else if (http_header_strict)
1121             return http_request_header_line_invalid(r, 400, "missing CR before LF in header -> 400");
1122         /* remove trailing whitespace from value (+ remove '\r\n') */
1123         /* (line k[-1] is always preceded by a '\n',
1124          *  including first header after request-line,
1125          *  so no need to check (end != k)) */
1126         do { --end; } while (end[-1] == ' ' || end[-1] == '\t');
1127 
1128         const int vlen = (int)(end - v);
1129         /* empty header-fields are not allowed by HTTP-RFC, we just ignore them */
1130         if (vlen <= 0) continue; /* ignore header */
1131 
1132         if (http_header_strict) {
1133             for (int j = 0; j < vlen; ++j) {
1134                 if ((((unsigned char *)v)[j] < 32 && v[j] != '\t') || v[j]==127)
1135                     return http_request_header_char_invalid(r, v[j], "invalid character in header -> 400");
1136             }
1137         } /* else URI already checked in http_request_parse_reqline() for any '\0' */
1138 
1139         int status = http_request_parse_single_header(r, id, k, (size_t)klen, v, (size_t)vlen);
1140         if (0 != status) return status;
1141     }
1142 
1143     return 0;
1144 }
1145 
1146 
1147 static int
1148 http_request_parse (request_st * const restrict r, const int scheme_port)
1149 {
1150     int status = http_request_parse_target(r, scheme_port);
1151     if (0 != status) return status;
1152 
1153     /* post-processing */
1154     const unsigned int http_parseopts = r->conf.http_parseopts;
1155 
1156     /* check hostname field if it is set */
1157     /*(r->http_host might not be set until after parsing request headers)*/
1158     if (__builtin_expect( (r->http_host != NULL), 1)) {
1159         if (0 != http_request_host_policy(r->http_host,
1160                                           http_parseopts, scheme_port))
1161             return http_request_header_line_invalid(r, 400, "Invalid Hostname -> 400");
1162         buffer_copy_buffer(&r->uri.authority, r->http_host);
1163     }
1164     else {
1165         buffer_copy_string_len(&r->uri.authority, CONST_STR_LEN(""));
1166         if (r->http_version >= HTTP_VERSION_1_1)
1167             return http_request_header_line_invalid(r, 400, "HTTP/1.1 but Host missing -> 400");
1168     }
1169 
1170     if (HTTP_VERSION_1_1 != r->http_version
1171         && (r->rqst_htags
1172             & (light_bshift(HTTP_HEADER_UPGRADE)
1173               |light_bshift(HTTP_HEADER_HTTP2_SETTINGS)))) {
1174         return http_request_header_line_invalid(r, 400, "invalid hop-by-hop header w/o HTTP/1.1 -> 400");
1175     }
1176 
1177     if (0 == r->reqbody_length) {
1178         /* POST requires Content-Length (or Transfer-Encoding)
1179          * (-1 == r->reqbody_length when Transfer-Encoding: chunked)*/
1180         if (HTTP_METHOD_POST == r->http_method
1181             && !light_btst(r->rqst_htags, HTTP_HEADER_CONTENT_LENGTH)) {
1182             return http_request_header_line_invalid(r, 411, "POST-request, but content-length missing -> 411");
1183         }
1184     }
1185     else {
1186         /* (-1 == r->reqbody_length when Transfer-Encoding: chunked)*/
1187         if (-1 == r->reqbody_length
1188             && light_btst(r->rqst_htags, HTTP_HEADER_CONTENT_LENGTH)) {
1189             /* RFC7230 Hypertext Transfer Protocol (HTTP/1.1): Message Syntax and Routing
1190              * 3.3.3.  Message Body Length
1191              * [...]
1192              * If a message is received with both a Transfer-Encoding and a
1193              * Content-Length header field, the Transfer-Encoding overrides the
1194              * Content-Length.  Such a message might indicate an attempt to
1195              * perform request smuggling (Section 9.5) or response splitting
1196              * (Section 9.4) and ought to be handled as an error.  A sender MUST
1197              * remove the received Content-Length field prior to forwarding such
1198              * a message downstream.
1199              */
1200             const unsigned int http_header_strict =
1201               (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT);
1202             if (http_header_strict) {
1203                 return http_request_header_line_invalid(r, 400, "invalid Transfer-Encoding + Content-Length -> 400");
1204             }
1205             else {
1206                 /* ignore Content-Length */
1207                 http_header_request_unset(r, HTTP_HEADER_CONTENT_LENGTH, CONST_STR_LEN("Content-Length"));
1208             }
1209         }
1210         if (http_method_get_or_head(r->http_method)
1211             && !(http_parseopts & HTTP_PARSEOPT_METHOD_GET_BODY)) {
1212             return http_request_header_line_invalid(r, 400, "GET/HEAD with content-length -> 400");
1213         }
1214     }
1215 
1216     return 0;
1217 }
1218 
1219 
1220 static int
1221 http_request_parse_hoff (request_st * const restrict r, char * const restrict hdrs, const unsigned short * const restrict hoff, const int scheme_port)
1222 {
1223     /*
1224      * Request: "^(GET|POST|HEAD|...) ([^ ]+(\\?[^ ]+|)) (HTTP/1\\.[01])$"
1225      * Header : "^([-a-zA-Z]+): (.+)$"
1226      * End    : "^$"
1227      */
1228 
1229     int status;
1230     const unsigned int http_parseopts = r->conf.http_parseopts;
1231 
1232     status = http_request_parse_reqline(r, hdrs, hoff, http_parseopts);
1233     if (0 != status) return status;
1234 
1235     status = http_request_parse_headers(r, hdrs, hoff, http_parseopts);
1236     if (0 != status) return status;
1237 
1238     return http_request_parse(r, scheme_port);
1239 }
1240 
1241 
1242 static void
1243 http_request_headers_fin (request_st * const restrict r)
1244 {
1245     if (0 == r->http_status) {
1246       #if 0
1247         r->conditional_is_valid = (1 << COMP_SERVER_SOCKET)
1248                                 | (1 << COMP_HTTP_SCHEME)
1249                                 | (1 << COMP_HTTP_HOST)
1250                                 | (1 << COMP_HTTP_REMOTE_IP)
1251                                 | (1 << COMP_HTTP_REQUEST_METHOD)
1252                                 | (1 << COMP_HTTP_URL)
1253                                 | (1 << COMP_HTTP_QUERY_STRING)
1254                                 | (1 << COMP_HTTP_REQUEST_HEADER);
1255       #else
1256         /* all config conditions are valid after parsing header
1257          * (set all bits; remove dependency on plugin_config.h) */
1258         r->conditional_is_valid = ~0u;
1259       #endif
1260     }
1261     else {
1262         r->keep_alive = 0;
1263         r->reqbody_length = 0;
1264     }
1265 }
1266 
1267 
1268 void
1269 http_request_headers_process (request_st * const restrict r, char * const restrict hdrs, const unsigned short * const restrict hoff, const int scheme_port)
1270 {
1271     r->http_status = http_request_parse_hoff(r, hdrs, hoff, scheme_port);
1272 
1273     http_request_headers_fin(r);
1274 
1275     if (__builtin_expect( (0 != r->http_status), 0)) {
1276         if (r->conf.log_request_header_on_error) {
1277             /*(http_request_parse_headers() modifies hdrs only to
1278              * undo line-wrapping in-place using spaces)*/
1279             log_error_multiline(r->conf.errh, __FILE__, __LINE__,
1280                                 hdrs, r->rqst_header_len, "rqst: ");
1281         }
1282     }
1283 }
1284 
1285 
1286 void
1287 http_request_headers_process_h2 (request_st * const restrict r, const int scheme_port)
1288 {
1289     if (0 == r->http_status)
1290         r->http_status = http_request_parse(r, scheme_port);
1291 
1292     if (0 == r->http_status) {
1293         if (light_btst(r->rqst_htags, HTTP_HEADER_CONNECTION))
1294             r->http_status = http_request_header_line_invalid(r, 400,
1295               "invalid Connection header with HTTP/2 -> 400");
1296     }
1297 
1298     http_request_headers_fin(r);
1299 
1300     /* limited; headers not collected into a single buf for HTTP/2 */
1301     if (__builtin_expect( (0 != r->http_status), 0)) {
1302         if (r->conf.log_request_header_on_error) {
1303             log_error(r->conf.errh, __FILE__, __LINE__,
1304               "request-header:\n:authority: %s\n:method: %s\n:path: %s",
1305               r->http_host ? r->http_host->ptr : "",
1306               http_method_buf(r->http_method)->ptr,
1307               !buffer_is_blank(&r->target) ? r->target.ptr : "");
1308         }
1309     }
1310 
1311     /* ignore Upgrade if using HTTP/2 */
1312     if (light_btst(r->rqst_htags, HTTP_HEADER_UPGRADE))
1313         http_header_request_unset(r, HTTP_HEADER_UPGRADE,
1314                                   CONST_STR_LEN("upgrade"));
1315     /* XXX: should filter out other hop-by-hop connection headers, too */
1316 }
1317