xref: /lighttpd1.4/src/request.c (revision 438dadde)
1 /*
2  * request - HTTP request processing
3  *
4  * Fully-rewritten from original
5  * Copyright(c) 2018 Glenn Strauss gstrauss()gluelogic.com  All rights reserved
6  * License: BSD 3-clause (same as lighttpd)
7  */
8 #include "first.h"
9 
10 #include "request.h"
11 #include "burl.h"
12 #include "http_header.h"
13 #include "http_kv.h"
14 #include "log.h"
15 #include "sock_addr.h"
16 
17 #include <limits.h>
18 #include <stdint.h>
19 #include <stdlib.h>
20 #include <string.h>
21 
22 __attribute_noinline__
23 __attribute_nonnull__()
24 __attribute_pure__
25 static const char * http_request_check_uri_strict (const uint8_t * const restrict s, const uint_fast32_t len) {
26     for (uint_fast32_t i = 0; i < len; ++i) {
27         if (__builtin_expect( (s[i] <= 32),  0)) return (const char *)s+i;
28         if (__builtin_expect( (s[i] == 127), 0)) return (const char *)s+i;
29         if (__builtin_expect( (s[i] == 255), 0)) return (const char *)s+i;
30     }
31     return NULL;
32 }
33 
34 __attribute_nonnull__()
35 __attribute_pure__
36 static const char * http_request_check_line_strict (const char * const restrict s, const uint_fast32_t len) {
37     for (uint_fast32_t i = 0; i < len; ++i) {
38         if (__builtin_expect( (((const uint8_t *)s)[i]<32), 0) && s[i] != '\t')
39             return s+i;
40         if (__builtin_expect( (s[i] == 127), 0))
41             return s+i;
42     }
43     return NULL;
44 }
45 
46 __attribute_nonnull__()
47 __attribute_pure__
48 static const char * http_request_check_line_minimal (const char * const restrict s, const uint_fast32_t len) {
49     for (uint_fast32_t i = 0; i < len; ++i) {
50         if (__builtin_expect( (s[i] == '\0'), 0)) return s+i;
51     }
52     return NULL;
53 }
54 
55 static int request_check_hostname(buffer * const host) {
56     /*
57      *       hostport      = host [ ":" port ]
58      *       host          = hostname | IPv4address | IPv6address
59      *       hostname      = *( domainlabel "." ) toplabel [ "." ]
60      *       domainlabel   = alphanum | alphanum *( alphanum | "-" ) alphanum
61      *       toplabel      = alpha | alpha *( alphanum | "-" ) alphanum
62      *       IPv4address   = 1*digit "." 1*digit "." 1*digit "." 1*digit
63      *       IPv6address   = "[" ... "]"
64      *       port          = *digit
65      */
66 
67     const char *h = host->ptr;
68 
69     if (*h != '[') {
70         uint32_t len = buffer_clen(host);
71         const char * const colon = memchr(h, ':', len);
72         uint32_t hlen = colon ? (uint32_t)(colon - h) : len;
73 
74         /* if hostname ends in ".", strip it */
75         if (__builtin_expect( (0 == hlen), 0)) return -1;
76         if (__builtin_expect( (h[hlen-1] == '.'), 0)) {
77             /* shift port info one left */
78             if (--hlen == 0) return -1;
79             --len;
80             if (NULL != colon)
81                 memmove(host->ptr+hlen, colon, len - hlen);
82             buffer_truncate(host, len);
83         }
84 
85         int label_len = 0;
86         int allnumeric = 1;
87         int numeric = 1;
88         int level = 0;
89         for (uint32_t i = 0; i < hlen; ++i) {
90             const int ch = h[i];
91             ++label_len;
92             if (light_isdigit(ch))
93                 continue;
94             else if ((light_isalpha(ch) || (ch == '-' && i != 0)))
95                 numeric = 0;
96             else if (ch == '.' && 1 != label_len && '-' != h[i+1]) {
97                 allnumeric &= numeric;
98                 numeric = 1;
99                 label_len = 0;
100                 ++level;
101             }
102             else
103                 return -1;
104         }
105         /* (if last segment numeric, then IPv4 and must have 4 numeric parts) */
106         if (0 == label_len || (numeric && (level != 3 || !allnumeric)))
107             return -1;
108 
109         h += hlen;
110     }
111     else {  /* IPv6 address */
112         /* check the address inside [...]; note: not fully validating */
113         /* (note: not allowing scoped literals, e.g. %eth0 suffix) */
114         ++h; /* step past '[' */
115         int cnt = 0;
116         while (light_isxdigit(*h) || *h == '.' || (*h == ':' && ++cnt < 8)) ++h;
117         /*(invalid char, too many ':', missing ']', or empty "[]")*/
118         if (*h != ']' || h - host->ptr == 1) return -1;
119         ++h; /* step past ']' */
120     }
121 
122     /* check numerical port, if present */
123     if (*h == ':') {
124         if (__builtin_expect( (h[1] == '\0'), 0)) /*(remove trailing colon)*/
125             buffer_truncate(host, h - host->ptr);
126         do { ++h; } while (light_isdigit(*h));
127     }
128 
129     return (*h == '\0') ? 0 : -1;
130 }
131 
132 int http_request_host_normalize(buffer * const b, const int scheme_port) {
133     /*
134      * check for and canonicalize numeric IP address and portnum (optional)
135      * (IP address may be followed by ":portnum" (optional))
136      * - IPv6: "[...]"
137      * - IPv4: "x.x.x.x"
138      * - IPv4: 12345678   (32-bit decimal number)
139      * - IPv4: 012345678  (32-bit octal number)
140      * - IPv4: 0x12345678 (32-bit hex number)
141      *
142      * allow any chars (except ':' and '\0' and stray '[' or ']')
143      *   (other code may check chars more strictly or more pedantically)
144      * ':'  delimits (optional) port at end of string
145      * "[]" wraps IPv6 address literal
146      * '\0' should have been rejected earlier were it present
147      *
148      * any chars includes, but is not limited to:
149      * - allow '-' any where, even at beginning of word
150      *     (security caution: might be confused for cmd flag if passed to shell)
151      * - allow all-digit TLDs
152      *     (might be mistaken for IPv4 addr by inet_aton()
153      *      unless non-digits appear in subdomain)
154      */
155 
156     /* Note: not using getaddrinfo() since it does not support "[]" around IPv6
157      * and is not as lenient as inet_aton() and inet_addr() for IPv4 strings.
158      * Not using inet_pton() (when available) on IPv4 for similar reasons. */
159 
160     const char * const p = b->ptr;
161     const size_t blen = buffer_clen(b);
162     long port = 0;
163 
164     if (*p != '[') {
165         char * const colon = (char *)memchr(p, ':', blen);
166         if (colon) {
167             if (*p == ':') return -1; /*(empty host then port, or naked IPv6)*/
168             if (colon[1] != '\0') {
169                 char *e;
170                 port = strtol(colon+1, &e, 0); /*(allow decimal, octal, hex)*/
171                 if (0 < port && port <= USHRT_MAX && *e == '\0') {
172                     /* valid port */
173                 } else {
174                     return -1;
175                 }
176             } /*(else ignore stray colon at string end)*/
177             buffer_truncate(b, (size_t)(colon - p)); /*(remove port str)*/
178         }
179 
180         if (light_isdigit(*p)) do {
181             /* (IPv4 address literal or domain starting w/ digit (e.g. 3com))*/
182             /* (check one-element cache of normalized IPv4 address string) */
183             static struct { char s[INET_ADDRSTRLEN]; size_t n; } laddr;
184             size_t n = colon ? (size_t)(colon - p) : blen;
185             sock_addr addr;
186             if (n == laddr.n && 0 == memcmp(p, laddr.s, n)) break;
187             if (1 == sock_addr_inet_pton(&addr, p, AF_INET, 0)) {
188                 sock_addr_inet_ntop_copy_buffer(b, &addr);
189                 n = buffer_clen(b);
190                 if (n < sizeof(laddr.s)) memcpy(laddr.s, b->ptr, (laddr.n = n));
191             }
192         } while (0);
193     } else do { /* IPv6 addr */
194       #if defined(HAVE_IPV6) && defined(HAVE_INET_PTON)
195 
196         /* (check one-element cache of normalized IPv4 address string) */
197         static struct { char s[INET6_ADDRSTRLEN]; size_t n; } laddr;
198         sock_addr addr;
199         char *bracket = b->ptr+blen-1;
200         char *percent = strchr(b->ptr+1, '%');
201         size_t len;
202         int rc;
203         char buf[INET6_ADDRSTRLEN+16]; /*(+16 for potential %interface name)*/
204         if (blen <= 2) return -1; /*(invalid "[]")*/
205         if (*bracket != ']') {
206             bracket = (char *)memchr(b->ptr+1, ']', blen-1);
207             if (NULL == bracket || bracket[1] != ':'  || bracket - b->ptr == 1){
208                return -1;
209             }
210             if (bracket[2] != '\0') { /*(ignore stray colon at string end)*/
211                 char *e;
212                 port = strtol(bracket+2, &e, 0); /*(allow decimal, octal, hex)*/
213                 if (0 < port && port <= USHRT_MAX && *e == '\0') {
214                     /* valid port */
215                 } else {
216                     return -1;
217                 }
218             }
219         }
220 
221         len = (size_t)((percent ? percent : bracket) - (b->ptr+1));
222         if (laddr.n == len && 0 == memcmp(laddr.s, b->ptr+1, len)) {
223             /* truncate after ']' and re-add normalized port, if needed */
224             buffer_truncate(b, (size_t)(bracket - b->ptr + 1));
225             break;
226         }
227 
228         *bracket = '\0';/*(terminate IPv6 string)*/
229         if (percent) *percent = '\0'; /*(remove %interface from address)*/
230         rc = sock_addr_inet_pton(&addr, b->ptr+1, AF_INET6, 0);
231         if (percent) *percent = '%'; /*(restore %interface)*/
232         *bracket = ']'; /*(restore bracket)*/
233         if (1 != rc) return -1;
234 
235         sock_addr_inet_ntop(&addr, buf, sizeof(buf));
236         len = strlen(buf);
237         if (percent) {
238             if (percent > bracket) return -1;
239             if (len + (size_t)(bracket - percent) >= sizeof(buf)) return -1;
240             if (len < sizeof(laddr.s)) memcpy(laddr.s, buf, (laddr.n = len));
241             memcpy(buf+len, percent, (size_t)(bracket - percent));
242             len += (size_t)(bracket - percent);
243         }
244         buffer_truncate(b, 1); /* truncate after '[' */
245         buffer_append_str2(b, buf, len, CONST_STR_LEN("]"));
246 
247       #else
248 
249         return -1;
250 
251       #endif
252     } while (0);
253 
254     if (0 != port && port != scheme_port) {
255         buffer_append_string_len(b, CONST_STR_LEN(":"));
256         buffer_append_int(b, (int)port);
257     }
258 
259     return 0;
260 }
261 
262 int http_request_host_policy (buffer * const b, const unsigned int http_parseopts, const int scheme_port) {
263     /* caller should lowercase, as is done in http_request_header_set_Host(),
264      * for consistency in case the value is used prior to calling policy func */
265     /*buffer_to_lower(b);*/
266     return (((http_parseopts & HTTP_PARSEOPT_HOST_STRICT)
267                ? 0 != request_check_hostname(b)
268                : NULL != http_request_check_line_minimal(BUF_PTR_LEN(b)))
269             || ((http_parseopts & HTTP_PARSEOPT_HOST_NORMALIZE)
270                 && 0 != http_request_host_normalize(b, scheme_port)));
271 }
272 
273 __attribute_cold__
274 __attribute_noinline__
275 static int http_request_header_line_invalid(request_st * const restrict r, const int status, const char * const restrict msg) {
276     if (r->conf.log_request_header_on_error) {
277         if (msg) log_error(r->conf.errh, __FILE__, __LINE__, "%s", msg);
278     }
279     return status;
280 }
281 
282 __attribute_cold__
283 __attribute_noinline__
284 static int http_request_header_char_invalid(request_st * const restrict r, const char ch, const char * const restrict msg) {
285     if (r->conf.log_request_header_on_error) {
286         if ((unsigned char)ch > 32 && ch != 127) {
287             log_error(r->conf.errh, __FILE__, __LINE__, "%s ('%c')", msg, ch);
288         }
289         else {
290             log_error(r->conf.errh, __FILE__, __LINE__, "%s (0x%x)", msg, ch);
291         }
292     }
293     return 400;
294 }
295 
296 
297 __attribute_noinline__
298 static void http_request_header_set_Host(request_st * const restrict r, const char * const h, size_t hlen)
299 {
300     r->http_host = http_header_request_set_ptr(r, HTTP_HEADER_HOST,
301                                                CONST_STR_LEN("Host"));
302     buffer_copy_string_len_lc(r->http_host, h, hlen);
303 }
304 
305 
306 int64_t
307 li_restricted_strtoint64 (const char *v, const uint32_t vlen, const char ** const err)
308 {
309     /* base 10 strtoll() parsing exactly vlen chars and requiring digits 0-9 */
310     /* rejects negative numbers and considers values > INT64_MAX an error */
311     /* note: errno is not set; detect error if *err != v+vlen upon return */
312     /*(caller must check 0 == vlen if that is to be an error for caller)*/
313     int64_t rv = 0;
314     uint32_t i;
315     for (i = 0; i < vlen; ++i) {
316         const uint8_t c = ((uint8_t *)v)[i] - '0'; /*(unsigned; underflow ok)*/
317         if (c > 9) break;
318         if (rv > INT64_MAX/10) break;
319         rv *= 10;
320         if (rv > INT64_MAX - c) break;
321         rv += c;
322     }
323     *err = v+i;
324     return rv;
325 }
326 
327 
328 __attribute_cold__
329 static int http_request_parse_duplicate(request_st * const restrict r, const enum http_header_e id, const char * const restrict k, const size_t klen, const char * const restrict v, const size_t vlen) {
330     /* Proxies sometimes send dup headers
331      * if they are the same we ignore the second
332      * if not, we raise an error */
333     const buffer * const vb = http_header_request_get(r, id, k, klen);
334     if (vb && buffer_eq_icase_slen(vb, v, vlen))
335         return 0; /* ignore header; matches existing header */
336 
337     const char *errmsg;
338     switch (id) {
339       case HTTP_HEADER_HOST:
340         errmsg = "duplicate Host header -> 400";
341         break;
342       case HTTP_HEADER_CONTENT_TYPE:
343         errmsg = "duplicate Content-Type header -> 400";
344         break;
345       case HTTP_HEADER_IF_MODIFIED_SINCE:
346         errmsg = "duplicate If-Modified-Since header -> 400";
347         break;
348       default:
349         errmsg = "duplicate header -> 400";
350         break;
351     }
352     return http_request_header_line_invalid(r, 400, errmsg);
353 }
354 
355 
356 /* add header to list of headers
357  * certain headers are also parsed
358  * might drop a header if deemed unnecessary/broken
359  *
360  * returns 0 on success, HTTP status on error
361  */
362 static int http_request_parse_single_header(request_st * const restrict r, const enum http_header_e id, const char * const restrict k, const size_t klen, const char * const restrict v, const size_t vlen) {
363     /*
364      * Note: k might not be '\0'-terminated
365      * Note: v is not '\0'-terminated
366      *   With lighttpd HTTP/1.1 parser, v ends with whitespace
367      *     (one of '\r' '\n' ' ' '\t')
368      *   With lighttpd HTTP/2 parser, v should not be accessed beyond vlen
369      *     (care must be taken to avoid libc funcs which expect z-strings)
370      */
371     /*assert(vlen);*//*(caller must not call this func with 0 klen or 0 vlen)*/
372 
373     switch (id) {
374       /*case HTTP_HEADER_OTHER:*/
375       default:
376         break;
377       case HTTP_HEADER_HOST:
378         if (!light_btst(r->rqst_htags, HTTP_HEADER_HOST)) {
379             if (vlen >= 1024) { /*(expecting < 256)*/
380                 return http_request_header_line_invalid(r, 400, "uri-authority too long -> 400");
381             }
382             /*(http_request_header_append() plus sets r->http_host)*/
383             http_request_header_set_Host(r, v, vlen);
384             return 0;
385         }
386         else if (NULL != r->http_host
387                  && __builtin_expect( buffer_eq_slen(r->http_host,v,vlen), 1)) {
388             /* ignore all Host: headers if match authority in request line */
389             /* (expect Host to match case in :authority of HTTP/2 request) */
390             return 0; /* ignore header */
391         }
392         else {
393             return http_request_parse_duplicate(r, id, k, klen, v, vlen);
394         }
395         break;
396       case HTTP_HEADER_CONNECTION:
397         /* "Connection: close" is common case if header is present */
398         if ((vlen == 5 && buffer_eq_icase_ssn(v, CONST_STR_LEN("close")))
399             || http_header_str_contains_token(v,vlen,CONST_STR_LEN("close"))) {
400             r->keep_alive = 0;
401             break;
402         }
403         if (http_header_str_contains_token(v,vlen,CONST_STR_LEN("keep-alive"))){
404             r->keep_alive = 1;
405             break;
406         }
407         break;
408       case HTTP_HEADER_CONTENT_TYPE:
409         if (light_btst(r->rqst_htags, HTTP_HEADER_CONTENT_TYPE)) {
410             return http_request_parse_duplicate(r, id, k, klen, v, vlen);
411         }
412         break;
413       case HTTP_HEADER_IF_NONE_MATCH:
414         /* if dup, only the first one will survive */
415         if (light_btst(r->rqst_htags, HTTP_HEADER_IF_NONE_MATCH)) {
416             return 0; /* ignore header */
417         }
418         break;
419       case HTTP_HEADER_CONTENT_LENGTH:
420         if (!light_btst(r->rqst_htags, HTTP_HEADER_CONTENT_LENGTH)) {
421             /*(trailing whitespace was removed from vlen)*/
422             /*(not using strtoll() since v might not be z-string)*/
423             const char *err;
424             off_t clen = (off_t)li_restricted_strtoint64(v, vlen, &err);
425             if (err == v+vlen) {
426                 /* (set only if not set to -1 by Transfer-Encoding: chunked) */
427                 if (0 == r->reqbody_length) r->reqbody_length = clen;
428             }
429             else {
430                 return http_request_header_line_invalid(r, 400, "invalid Content-Length header -> 400");
431             }
432         }
433         else {
434             return http_request_header_line_invalid(r, 400, "duplicate Content-Length header -> 400");
435         }
436         break;
437       case HTTP_HEADER_HTTP2_SETTINGS:
438         if (light_btst(r->rqst_htags, HTTP_HEADER_HTTP2_SETTINGS)) {
439             return http_request_header_line_invalid(r, 400, "duplicate HTTP2-Settings header -> 400");
440         }
441         break;
442       case HTTP_HEADER_IF_MODIFIED_SINCE:
443         if (light_btst(r->rqst_htags, HTTP_HEADER_IF_MODIFIED_SINCE)) {
444             return http_request_parse_duplicate(r, id, k, klen, v, vlen);
445         }
446         break;
447       case HTTP_HEADER_TRANSFER_ENCODING:
448         if (HTTP_VERSION_1_1 != r->http_version) {
449             return http_request_header_line_invalid(r, 400,
450               HTTP_VERSION_1_0 == r->http_version
451                 ? "HTTP/1.0 with Transfer-Encoding (bad HTTP/1.0 proxy?) -> 400"
452                 : "HTTP/2 with Transfer-Encoding is invalid -> 400");
453         }
454 
455         if (!buffer_eq_icase_ss(v, vlen, CONST_STR_LEN("chunked"))) {
456             /* Transfer-Encoding might contain additional encodings,
457              * which are not currently supported by lighttpd */
458             return http_request_header_line_invalid(r, 501, NULL); /* Not Implemented */
459         }
460         r->reqbody_length = -1;
461 
462         /* Transfer-Encoding is a hop-by-hop header,
463          * which must not be blindly forwarded to backends */
464         return 0; /* skip header */
465     }
466 
467     http_header_request_append(r, id, k, klen, v, vlen);
468     return 0;
469 }
470 
471 __attribute_cold__
472 static int http_request_parse_proto_loose(request_st * const restrict r, const char * const restrict ptr, const size_t len, const unsigned int http_parseopts) {
473     const char * proto = memchr(ptr, ' ', len);
474     if (NULL == proto)
475         return http_request_header_line_invalid(r, 400, "incomplete request line -> 400");
476     proto = memchr(proto+1, ' ', len - (proto+1 - ptr));
477     if (NULL == proto)
478         return http_request_header_line_invalid(r, 400, "incomplete request line -> 400");
479     ++proto;
480 
481     if (proto[0]=='H' && proto[1]=='T' && proto[2]=='T' && proto[3]=='P' && proto[4] == '/') {
482         if (proto[5] == '1' && proto[6] == '.' && (proto[7] == '1' || proto[7] == '0')) {
483             /* length already checked before calling this routine */
484             /* (len != (size_t)(proto - ptr + 8)) */
485             if (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT) /*(http_header_strict)*/
486                 return http_request_header_line_invalid(r, 400, "incomplete request line -> 400");
487             r->http_version = (proto[7] == '1') ? HTTP_VERSION_1_1 : HTTP_VERSION_1_0;
488         }
489         else
490             return http_request_header_line_invalid(r, 505, "unknown HTTP version -> 505");
491     }
492     else
493         return http_request_header_line_invalid(r, 400, "unknown protocol -> 400");
494 
495     /* keep-alive default: HTTP/1.1 -> true; HTTP/1.0 -> false */
496     r->keep_alive = (HTTP_VERSION_1_0 != r->http_version);
497 
498     return 0;
499 }
500 
501 __attribute_cold__
502 static const char * http_request_parse_reqline_uri(request_st * const restrict r, const char * const restrict uri, const size_t len, const unsigned int http_parseopts) {
503     const char *nuri;
504     if ((len > 7 && buffer_eq_icase_ssn(uri, "http://", 7)
505         && NULL != (nuri = memchr(uri + 7, '/', len-7)))
506        ||
507        (len > 8 && buffer_eq_icase_ssn(uri, "https://", 8)
508         && NULL != (nuri = memchr(uri + 8, '/', len-8)))) {
509         const char * const host = uri + (uri[4] == ':' ? 7 : 8);
510         const size_t hostlen = nuri - host;
511         if (0 == hostlen || hostlen >= 1024) { /*(expecting < 256)*/
512             http_request_header_line_invalid(r, 400, "uri-authority empty or too long -> 400");
513             return NULL;
514         }
515         /* Insert as "Host" header */
516         http_request_header_set_Host(r, host, hostlen);
517         return nuri;
518     } else if (!(http_parseopts & HTTP_PARSEOPT_HEADER_STRICT) /*(!http_header_strict)*/
519            || (HTTP_METHOD_CONNECT == r->http_method && (uri[0] == ':' || light_isdigit(uri[0])))
520            || (HTTP_METHOD_OPTIONS == r->http_method && uri[0] == '*' && 1 == len)) {
521         /* (permitted) */
522         return uri;
523     } else {
524         http_request_header_line_invalid(r, 400, "request-URI parse error -> 400");
525         return NULL;
526     }
527 }
528 
529 
530 __attribute_cold__
531 __attribute_noinline__
532 static int http_request_parse_header_other(request_st * const restrict r, const char * const restrict k, const int klen, const unsigned int http_header_strict);
533 
534 
535 int
536 http_request_validate_pseudohdrs (request_st * const restrict r, const int scheme, const unsigned int http_parseopts)
537 {
538     /* :method is required to indicate method
539      * CONNECT method must have :method and :authority
540      * All other methods must have at least :method :scheme :path */
541 
542     if (HTTP_METHOD_UNSET == r->http_method)
543         return http_request_header_line_invalid(r, 400,
544           "missing pseudo-header method -> 400");
545 
546     if (__builtin_expect( (HTTP_METHOD_CONNECT != r->http_method), 1)) {
547         if (!scheme)
548             return http_request_header_line_invalid(r, 400,
549               "missing pseudo-header scheme -> 400");
550 
551         if (buffer_is_blank(&r->target))
552             return http_request_header_line_invalid(r, 400,
553               "missing pseudo-header path -> 400");
554 
555         const char * const uri = r->target.ptr;
556         if (*uri != '/') { /* (common case: (*uri == '/')) */
557             if (uri[0] != '*' || uri[1] != '\0'
558                 || HTTP_METHOD_OPTIONS != r->http_method)
559                 return http_request_header_line_invalid(r, 400,
560                   "invalid pseudo-header path -> 400");
561         }
562     }
563     else { /* HTTP_METHOD_CONNECT */
564         if (NULL == r->http_host)
565             return http_request_header_line_invalid(r, 400,
566               "missing pseudo-header authority -> 400");
567         if (!buffer_is_blank(&r->target) || scheme)
568             return http_request_header_line_invalid(r, 400,
569               "invalid pseudo-header with CONNECT -> 400");
570         /* note: this copy occurs prior to http_request_host_policy()
571          * so any consumer handling CONNECT should normalize r->target
572          * as appropriate */
573         buffer_copy_buffer(&r->target, r->http_host);
574     }
575     buffer_copy_buffer(&r->target_orig, &r->target);
576 
577     /* r->http_host, if set, is checked with http_request_host_policy()
578      * in http_request_parse() */
579 
580     /* copied and modified from end of http_request_parse_reqline() */
581 
582     /* check uri for invalid characters */
583     const uint32_t len = buffer_clen(&r->target);/*(http_header_strict)*/
584     const char * const x = (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT)
585       ? (http_parseopts & HTTP_PARSEOPT_URL_NORMALIZE_CTRLS_REJECT)
586           ? NULL /* URI will be checked in http_request_parse_target() */
587           : http_request_check_uri_strict((const uint8_t *)r->target.ptr, len)
588       : http_request_check_line_minimal(r->target.ptr, len);
589     return (NULL == x)
590       ? 0
591       : http_request_header_char_invalid(r, *x,
592           "invalid character in URI -> 400");
593 }
594 
595 
596 int
597 http_request_parse_header (request_st * const restrict r, http_header_parse_ctx * const restrict hpctx)
598 {
599     /* Note: k and v might not be '\0' terminated strings;
600      * care must be taken to avoid libc funcs which expect z-strings */
601     const char * const restrict k = hpctx->k;
602     const char * const restrict v = hpctx->v;
603     const uint32_t klen = hpctx->klen;
604     const uint32_t vlen = hpctx->vlen;
605 
606     if (0 == klen)
607         return http_request_header_line_invalid(r, 400,
608           "invalid header key -> 400");
609 
610     if ((hpctx->hlen += klen + vlen + 4) > hpctx->max_request_field_size) {
611         /*(configurable with server.max-request-field-size; default 8k)*/
612       #if 1 /* emit to error log for people sending large headers */
613         log_error(r->conf.errh, __FILE__, __LINE__,
614                   "oversized request header -> 431");
615         return 431; /* Request Header Fields Too Large */
616       #else
617         /* 431 Request Header Fields Too Large */
618         return http_request_header_line_invalid(r, 431,
619           "oversized request header -> 431");
620       #endif
621     }
622 
623     if (!hpctx->trailers) {
624         if (*k == ':') {
625             /* HTTP/2 request pseudo-header fields */
626             if (!hpctx->pseudo) /*(pseudo header after non-pseudo header)*/
627                 return http_request_header_line_invalid(r, 400,
628                   "invalid pseudo-header -> 400");
629             if (0 == vlen)
630                 return http_request_header_line_invalid(r, 400,
631                   "invalid header value -> 400");
632 
633             /* (note: relies on implementation details using ls-hpack in h2.c)
634              * (hpctx->id mapped from lsxpack_header_t hpack_index, which only
635              *  matches key, not also value, if lsxpack_header_t flags does not
636              *  have LSXPACK_HPACK_VAL_MATCHED set, so HTTP_HEADER_H2_METHOD_GET
637              *  below indicates any method, not only "GET") */
638             if (__builtin_expect( (hpctx->id == HTTP_HEADER_H2_UNKNOWN), 0)) {
639                 switch (klen-1) {
640                   case 4:
641                     if (0 == memcmp(k+1, "path", 4))
642                         hpctx->id = HTTP_HEADER_H2_PATH;
643                     break;
644                   case 6:
645                     if (0 == memcmp(k+1, "method", 6))
646                         hpctx->id = HTTP_HEADER_H2_METHOD_GET;
647                     else if (0 == memcmp(k+1, "scheme", 6))
648                         hpctx->id = HTTP_HEADER_H2_SCHEME_HTTP;
649                     break;
650                   case 9:
651                     if (0 == memcmp(k+1, "authority", 9))
652                         hpctx->id = HTTP_HEADER_H2_AUTHORITY;
653                     break;
654                   default:
655                     break;
656                 }
657                 if (hpctx->id >= HTTP_HEADER_H2_UNKNOWN)
658                     return http_request_header_line_invalid(r, 400,
659                       "invalid pseudo-header -> 400");
660             }
661 
662             switch (hpctx->id) {
663               case HTTP_HEADER_H2_AUTHORITY:
664                 if (__builtin_expect( (r->http_host != NULL), 0))
665                     break;
666                 if (vlen >= 1024) /*(expecting < 256)*/
667                     return http_request_header_line_invalid(r, 400,
668                       "invalid pseudo-header authority too long -> 400");
669                 /* insert as "Host" header */
670                 http_request_header_set_Host(r, v, vlen);
671                 return 0;
672               case HTTP_HEADER_H2_METHOD_GET:  /*(any method, not only "GET")*/
673               case HTTP_HEADER_H2_METHOD_POST:
674                 if (__builtin_expect( (HTTP_METHOD_UNSET != r->http_method), 0))
675                     break;
676                 r->http_method = get_http_method_key(v, vlen);
677                 if (HTTP_METHOD_UNSET >= r->http_method)
678                     return http_request_header_line_invalid(r, 501,
679                       "unknown http-method -> 501");
680                 return 0;
681               case HTTP_HEADER_H2_PATH:            /*(any path, not only "/")*/
682               case HTTP_HEADER_H2_PATH_INDEX_HTML:
683                 if (__builtin_expect( (!buffer_is_blank(&r->target)), 0))
684                     break;
685                 buffer_copy_string_len(&r->target, v, vlen);
686                 return 0;
687               case HTTP_HEADER_H2_SCHEME_HTTP: /*(any scheme, not only "http")*/
688               case HTTP_HEADER_H2_SCHEME_HTTPS:
689                 if (__builtin_expect( (hpctx->scheme), 0))
690                     break;
691                 hpctx->scheme = 1; /*(marked present, but otherwise ignored)*/
692                 return 0;
693                #if 0
694                 switch (vlen) {/*(validated, but then ignored)*/
695                   case 5: /* "https" */
696                     if (v[4]!='s') break;
697                     __attribute_fallthrough__
698                   case 4: /* "http" */
699                     if (v[0]=='h' && v[1]=='t' && v[2]=='t' && v[3]=='p') {
700                         hpctx->scheme = 1;
701                         return 0;
702                     }
703                     break;
704                   default:
705                     break;
706                 }
707                 return http_request_header_line_invalid(r, 400,
708                   "unknown pseudo-header scheme -> 400");
709                #endif
710               default:
711                 return http_request_header_line_invalid(r, 400,
712                   "invalid pseudo-header -> 400");
713             }
714             return http_request_header_line_invalid(r, 400,
715               "repeated pseudo-header -> 400");
716         }
717         else { /*(non-pseudo headers)*/
718             if (hpctx->pseudo) { /*(transition to non-pseudo headers)*/
719                 hpctx->pseudo = 0;
720                 int status =
721                   http_request_validate_pseudohdrs(r, hpctx->scheme,
722                                                    hpctx->http_parseopts);
723                 if (0 != status) return status;
724             }
725             if (0 == vlen)
726                 return 0;
727 
728             const unsigned int http_header_strict =
729               (hpctx->http_parseopts & HTTP_PARSEOPT_HEADER_STRICT);
730 
731             const char * const x = (http_header_strict)
732               ? http_request_check_line_strict(v, vlen)
733               : http_request_check_line_minimal(v, vlen);
734             if (x)
735                 return http_request_header_char_invalid(r, *x,
736                   "invalid character in header -> 400");
737 
738             if (__builtin_expect( (hpctx->id == HTTP_HEADER_H2_UNKNOWN), 0)) {
739                 uint32_t j = 0;
740                 while (j < klen && (light_islower(k[j]) || k[j] == '-'))
741                     ++j;
742 
743                 if (__builtin_expect( (j != klen), 0)) {
744                     if (light_isupper(k[j]))
745                         return 400;
746                     if (0 != http_request_parse_header_other(r, k+j, klen-j,
747                                                             http_header_strict))
748                         return 400;
749                 }
750 
751                 hpctx->id = http_header_hkey_get_lc(k, klen);
752             }
753 
754             const enum http_header_e id = (enum http_header_e)hpctx->id;
755 
756             if (__builtin_expect( (id == HTTP_HEADER_TE), 0)
757                 && !buffer_eq_icase_ss(v, vlen, CONST_STR_LEN("trailers")))
758                 return http_request_header_line_invalid(r, 400,
759                   "invalid TE header value with HTTP/2 -> 400");
760 
761             return http_request_parse_single_header(r, id, k, klen, v, vlen);
762         }
763     }
764     else { /*(trailers)*/
765         if (*k == ':')
766             return http_request_header_line_invalid(r, 400,
767               "invalid pseudo-header in trailers -> 400");
768         /* ignore trailers (after required HPACK decoding) if streaming
769          * request body to backend since headers have already been sent
770          * to backend via Common Gateway Interface (CGI) (CGI, FastCGI,
771          * SCGI, etc) or HTTP/1.1 (proxy) (mod_proxy does not currently
772          * support using HTTP/2 to connect to backends) */
773       #if 0 /* (if needed, save flag in hpctx instead of fdevent.h dependency)*/
774         if (r->conf.stream_request_body & FDEVENT_STREAM_REQUEST)
775             return 0;
776       #endif
777         /* Note: do not unconditionally merge into headers since if
778          * headers had already been sent to backend, then mod_accesslog
779          * logging of request headers might be inaccurate.
780          * Many simple backends do not support HTTP/1.1 requests sending
781          * Transfer-Encoding: chunked, and even those that do might not
782          * handle trailers.  Some backends do not even support HTTP/1.1.
783          * For all these reasons, ignore trailers if streaming request
784          * body to backend.  Revisit in future if adding support for
785          * connecting to backends using HTTP/2 (with explicit config
786          * option to force connecting to backends using HTTP/2) */
787 
788         /* XXX: TODO: request trailers not handled if streaming reqbody
789          * XXX: must ensure that trailers are not disallowed field-names
790          */
791 
792       #if 0
793         if (0 == vlen)
794             return 0;
795       #endif
796 
797         return 0;
798     }
799 }
800 
801 
802 static int http_request_parse_reqline(request_st * const restrict r, const char * const restrict ptr, const unsigned short * const restrict hoff, const unsigned int http_parseopts) {
803     size_t len = hoff[2];
804 
805     /* parse the first line of the request
806      * <method> <uri> <protocol>\r\n
807      * */
808     if (len < 13) /* minimum len with (!http_header_strict): "x x HTTP/1.0\n" */
809         return http_request_header_line_invalid(r, 400, "invalid request line (too short) -> 400");
810     if (ptr[len-2] == '\r')
811         len-=2;
812     else if (!(http_parseopts & HTTP_PARSEOPT_HEADER_STRICT)) /*(!http_header_strict)*/
813         len-=1;
814     else
815         return http_request_header_line_invalid(r, 400, "missing CR before LF in header -> 400");
816 
817     /*
818      * RFC7230:
819      *   HTTP-version  = HTTP-name "/" DIGIT "." DIGIT
820      *   HTTP-name     = %x48.54.54.50 ; "HTTP", case-sensitive
821      */
822 
823     /* protocol is expected to be " HTTP/1.1" or " HTTP/1.0" at end of line */
824     union proto_un {
825       char c[8];
826       uint64_t u;
827     };
828     static const union proto_un http_1_1 = {{'H','T','T','P','/','1','.','1'}};
829     static const union proto_un http_1_0 = {{'H','T','T','P','/','1','.','0'}};
830     const char *p = ptr + len - 8;
831     union proto_un proto8;
832     proto8.c[0]=p[0]; proto8.c[1]=p[1]; proto8.c[2]=p[2]; proto8.c[3]=p[3];
833     proto8.c[4]=p[4]; proto8.c[5]=p[5]; proto8.c[6]=p[6]; proto8.c[7]=p[7];
834     if (p[-1] == ' ' && http_1_1.u == proto8.u) {
835         r->http_version = HTTP_VERSION_1_1;
836         r->keep_alive = 1; /* keep-alive default: HTTP/1.1 -> true */
837     }
838     else if (p[-1] == ' ' && http_1_0.u == proto8.u) {
839         r->http_version = HTTP_VERSION_1_0;
840         r->keep_alive = 0; /* keep-alive default: HTTP/1.0 -> false */
841     }
842     else {
843         int status = http_request_parse_proto_loose(r,ptr,len,http_parseopts);
844         if (0 != status) return status;
845         /*(space char must exist if http_request_parse_proto_loose() succeeds)*/
846         for (p = ptr + len - 9; p[-1] != ' '; --p) ;
847     }
848 
849     /* method is expected to be a short string in the general case */
850     size_t i = 0;
851     while (ptr[i] != ' ') ++i;
852   #if 0 /*(space must exist if protocol was parsed successfully)*/
853     while (i < len && ptr[i] != ' ') ++i;
854     if (ptr[i] != ' ')
855         return http_request_header_line_invalid(r, 400, "incomplete request line -> 400");
856   #endif
857 
858     r->http_method = get_http_method_key(ptr, i);
859     if (HTTP_METHOD_UNSET >= r->http_method)
860         return http_request_header_line_invalid(r, 501, "unknown http-method -> 501");
861 
862     const char *uri = ptr + i + 1;
863 
864     if (uri == p)
865         return http_request_header_line_invalid(r, 400, "no uri specified -> 400");
866     len = (size_t)(p - uri - 1);
867 
868     if (*uri != '/') { /* (common case: (*uri == '/')) */
869         uri = http_request_parse_reqline_uri(r, uri, len, http_parseopts);
870         if (NULL == uri) return 400;
871         len = (size_t)(p - uri - 1);
872     }
873 
874     if (0 == len)
875         return http_request_header_line_invalid(r, 400, "no uri specified -> 400");
876 
877     /* check uri for invalid characters */     /* http_header_strict */
878     const char * const x = (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT)
879       ? (http_parseopts & HTTP_PARSEOPT_URL_NORMALIZE_CTRLS_REJECT)
880           ? NULL /* URI will be checked in http_request_parse_target() */
881           : http_request_check_uri_strict((const uint8_t *)uri, len)
882       : memchr(ptr, '\0', hoff[hoff[0]]);/* check entire headers set for '\0' */
883     if (x)
884         http_request_header_char_invalid(r, *x, "invalid character in URI -> 400");
885 
886     buffer_copy_string_len(&r->target, uri, len);
887     buffer_copy_string_len(&r->target_orig, uri, len);
888     return 0;
889 }
890 
891 int http_request_parse_target(request_st * const r, int scheme_port) {
892     /* URI is parsed into components at start of request and may
893      * also be re-parsed upon HANDLER_COMEBACK during the request
894      * r->target is expected to be a "/url-part?query-part"
895      *   (and *not* a fully-qualified URI starting https://...)
896      * r->uri.authority is expected to be parsed elsewhere into r->http_host
897      */
898 
899     /**
900      * prepare strings
901      *
902      * - uri.path
903      * - uri.query
904      *
905      */
906 
907     /**
908      * Name according to RFC 2396
909      *
910      * - scheme
911      * - authority
912      * - path
913      * - query
914      *
915      * (scheme)://(authority)(path)?(query)#fragment
916      *
917      */
918 
919     /* take initial scheme value from connection-level state
920      * (request r->uri.scheme can be overwritten for later,
921      *  for example by mod_extforward or mod_magnet) */
922     buffer_copy_string_len(&r->uri.scheme, "https", scheme_port == 443 ? 5 : 4);
923 
924     buffer * const target = &r->target;
925     if (r->http_method == HTTP_METHOD_CONNECT
926         || (r->http_method == HTTP_METHOD_OPTIONS
927             && target->ptr[0] == '*'
928             && target->ptr[1] == '\0')) {
929         /* CONNECT ... (or) OPTIONS * ... */
930         buffer_copy_buffer(&r->uri.path, target);
931         buffer_clear(&r->uri.query);
932         return 0;
933     }
934 
935     char *qstr;
936     if (r->conf.http_parseopts & HTTP_PARSEOPT_URL_NORMALIZE) {
937         /*uint32_t len = buffer_clen(target);*/
938         int qs = burl_normalize(target, r->tmp_buf, r->conf.http_parseopts);
939         if (-2 == qs)
940             return http_request_header_line_invalid(r, 400,
941               "invalid character in URI -> 400"); /* Bad Request */
942         qstr = (-1 == qs) ? NULL : target->ptr+qs;
943       #if 0  /* future: might enable here, or below for all requests */
944         /* (Note: total header size not recalculated on HANDLER_COMEBACK
945          *  even if other request headers changed during processing)
946          * (If (0 != r->loops_per_request), then the generated
947          *  request is too large.  Should a different error be returned?) */
948         r->rqst_header_len -= len;
949         len = buffer_clen(target);
950         r->rqst_header_len += len;
951         if (len > MAX_HTTP_REQUEST_URI) {
952             return 414; /* 414 URI Too Long */
953         }
954         if (r->rqst_header_len > MAX_HTTP_REQUEST_HEADER) {
955             log_error(r->conf.errh, __FILE__, __LINE__,
956               "request header fields too large: %u -> 431",
957               r->rqst_header_len);
958             return 431; /* Request Header Fields Too Large */
959         }
960       #endif
961     }
962     else {
963         size_t rlen = buffer_clen(target);
964         qstr = memchr(target->ptr, '#', rlen);/* discard fragment */
965         if (qstr) {
966             rlen = (size_t)(qstr - target->ptr);
967             buffer_truncate(target, rlen);
968         }
969         qstr = memchr(target->ptr, '?', rlen);
970     }
971 
972     /** extract query string from target */
973     const char * const pstr = target->ptr;
974     const uint32_t rlen = buffer_clen(target);
975     uint32_t plen;
976     if (NULL != qstr) {
977         plen = (uint32_t)(qstr - pstr);
978         buffer_copy_string_len(&r->uri.query, qstr + 1, rlen - plen - 1);
979     }
980     else {
981         plen = rlen;
982         buffer_clear(&r->uri.query);
983     }
984     buffer_copy_string_len(&r->uri.path, pstr, plen);
985 
986     /* decode url to path
987      *
988      * - decode url-encodings  (e.g. %20 -> ' ')
989      * - remove path-modifiers (e.g. /../)
990      */
991 
992     buffer_urldecode_path(&r->uri.path);
993     buffer_path_simplify(&r->uri.path);
994     if (r->uri.path.ptr[0] != '/')
995         return http_request_header_line_invalid(r, 400,
996           "uri-path does not begin with '/' -> 400"); /* Bad Request */
997 
998     return 0;
999 }
1000 
1001 __attribute_cold__
1002 __attribute_noinline__
1003 static int http_request_parse_header_other(request_st * const restrict r, const char * const restrict k, const int klen, const unsigned int http_header_strict) {
1004     for (int i = 0; i < klen; ++i) {
1005         if (light_isalpha(k[i]) || k[i] == '-') continue; /*(common cases)*/
1006         /**
1007          * 1*<any CHAR except CTLs or separators>
1008          * CTLs == 0-31 + 127, CHAR = 7-bit ascii (0..127)
1009          *
1010          */
1011         switch(k[i]) {
1012         case ' ':
1013         case '\t':
1014             return http_request_header_line_invalid(r, 400, "WS character in key -> 400");
1015         case '(':
1016         case ')':
1017         case '<':
1018         case '>':
1019         case '@':
1020         case ',':
1021         case ';':
1022         case '\\':
1023         case '\"':
1024         case '/':
1025         case '[':
1026         case ']':
1027         case '?':
1028         case '=':
1029         case '{':
1030         case '}':
1031             return http_request_header_char_invalid(r, k[i], "invalid character in header key -> 400");
1032         default:
1033             if (http_header_strict ? (k[i] < 32 || ((unsigned char *)k)[i] >= 127) : k[i] == '\0')
1034                 return http_request_header_char_invalid(r, k[i], "invalid character in header key -> 400");
1035             break; /* ok */
1036         }
1037     }
1038     return 0;
1039 }
1040 
1041 static int http_request_parse_headers(request_st * const restrict r, char * const restrict ptr, const unsigned short * const restrict hoff, const unsigned int http_parseopts) {
1042     const unsigned int http_header_strict = (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT);
1043 
1044   #if 0 /*(not checked here; will later result in invalid label for HTTP header)*/
1045     int i = hoff[2];
1046 
1047     if (ptr[i] == ' ' || ptr[i] == '\t') {
1048         return http_request_header_line_invalid(r, 400, "WS at the start of first line -> 400");
1049     }
1050   #endif
1051 
1052     for (int i = 2; i < hoff[0]; ++i) {
1053         const char *k = ptr + hoff[i];
1054         /* one past last line hoff[hoff[0]] is to final "\r\n" */
1055         char *end = ptr + hoff[i+1];
1056 
1057         const char *colon = memchr(k, ':', end - k);
1058         if (NULL == colon)
1059             return http_request_header_line_invalid(r, 400, "invalid header missing ':' -> 400");
1060 
1061         const char *v = colon + 1;
1062 
1063         /* RFC7230 Hypertext Transfer Protocol (HTTP/1.1): Message Syntax and Routing
1064          * 3.2.4.  Field Parsing
1065          * [...]
1066          * No whitespace is allowed between the header field-name and colon.  In
1067          * the past, differences in the handling of such whitespace have led to
1068          * security vulnerabilities in request routing and response handling.  A
1069          * server MUST reject any received request message that contains
1070          * whitespace between a header field-name and colon with a response code
1071          * of 400 (Bad Request).  A proxy MUST remove any such whitespace from a
1072          * response message before forwarding the message downstream.
1073          */
1074         /* (line k[-1] is always preceded by a '\n',
1075          *  including first header after request-line,
1076          *  so no need to check colon != k) */
1077         if (colon[-1] == ' ' || colon[-1] == '\t') {
1078             if (http_header_strict) {
1079                 return http_request_header_line_invalid(r, 400, "invalid whitespace between field-name and colon -> 400");
1080             }
1081             else {
1082                 /* remove trailing whitespace from key(if !http_header_strict)*/
1083                 do { --colon; } while (colon[-1] == ' ' || colon[-1] == '\t');
1084             }
1085         }
1086 
1087         const int klen = (int)(colon - k);
1088         if (0 == klen)
1089             return http_request_header_line_invalid(r, 400, "invalid header key -> 400");
1090         const enum http_header_e id = http_header_hkey_get(k, klen);
1091 
1092         if (id == HTTP_HEADER_OTHER) {
1093             for (int j = 0; j < klen; ++j) {
1094                 if (light_isalpha(k[j]) || k[j] == '-') continue; /*(common cases)*/
1095                 if (0 != http_request_parse_header_other(r, k+j, klen-j, http_header_strict))
1096                     return 400;
1097                 break;
1098             }
1099         }
1100 
1101         /* remove leading whitespace from value */
1102         while (*v == ' ' || *v == '\t') ++v;
1103 
1104         for (; i+1 <= hoff[0]; ++i) {
1105             end = ptr + hoff[i+1];
1106             if (end[0] != ' ' && end[0] != '\t') break;
1107 
1108             /* line folding */
1109           #ifdef __COVERITY__
1110             force_assert(end - k >= 2);
1111           #endif
1112             if (end[-2] == '\r')
1113                 end[-2] = ' ';
1114             else if (http_header_strict)
1115                 return http_request_header_line_invalid(r, 400, "missing CR before LF in header -> 400");
1116             end[-1] = ' ';
1117         }
1118       #ifdef __COVERITY__
1119         /*(buf holding k has non-zero request-line, so end[-2] valid)*/
1120         force_assert(end >= k + 2);
1121       #endif
1122         if (end[-2] == '\r')
1123             --end;
1124         else if (http_header_strict)
1125             return http_request_header_line_invalid(r, 400, "missing CR before LF in header -> 400");
1126         /* remove trailing whitespace from value (+ remove '\r\n') */
1127         /* (line k[-1] is always preceded by a '\n',
1128          *  including first header after request-line,
1129          *  so no need to check (end != k)) */
1130         do { --end; } while (end[-1] == ' ' || end[-1] == '\t');
1131 
1132         const int vlen = (int)(end - v);
1133         /* empty header-fields are not allowed by HTTP-RFC, we just ignore them */
1134         if (vlen <= 0) continue; /* ignore header */
1135 
1136         if (http_header_strict) {
1137             const char * const x = http_request_check_line_strict(v, vlen);
1138             if (x)
1139                 return http_request_header_char_invalid(r, *x,
1140                   "invalid character in header -> 400");
1141         } /* else URI already checked in http_request_parse_reqline() for any '\0' */
1142 
1143         int status = http_request_parse_single_header(r, id, k, (size_t)klen, v, (size_t)vlen);
1144         if (0 != status) return status;
1145     }
1146 
1147     return 0;
1148 }
1149 
1150 
1151 static int
1152 http_request_parse (request_st * const restrict r, const int scheme_port)
1153 {
1154     int status = http_request_parse_target(r, scheme_port);
1155     if (0 != status) return status;
1156 
1157     /* post-processing */
1158     const unsigned int http_parseopts = r->conf.http_parseopts;
1159 
1160     /* check hostname field if it is set */
1161     /*(r->http_host might not be set until after parsing request headers)*/
1162     if (__builtin_expect( (r->http_host != NULL), 1)) {
1163         if (0 != http_request_host_policy(r->http_host,
1164                                           http_parseopts, scheme_port))
1165             return http_request_header_line_invalid(r, 400, "Invalid Hostname -> 400");
1166         buffer_copy_buffer(&r->uri.authority, r->http_host);
1167     }
1168     else {
1169         buffer_copy_string_len(&r->uri.authority, CONST_STR_LEN(""));
1170         if (r->http_version >= HTTP_VERSION_1_1)
1171             return http_request_header_line_invalid(r, 400, "HTTP/1.1 but Host missing -> 400");
1172     }
1173 
1174     if (HTTP_VERSION_1_1 != r->http_version
1175         && (r->rqst_htags
1176             & (light_bshift(HTTP_HEADER_UPGRADE)
1177               |light_bshift(HTTP_HEADER_HTTP2_SETTINGS)))) {
1178         return http_request_header_line_invalid(r, 400, "invalid hop-by-hop header w/o HTTP/1.1 -> 400");
1179     }
1180 
1181     if (0 == r->reqbody_length) {
1182         /* POST requires Content-Length (or Transfer-Encoding)
1183          * (-1 == r->reqbody_length when Transfer-Encoding: chunked)*/
1184         if (HTTP_METHOD_POST == r->http_method
1185             && !light_btst(r->rqst_htags, HTTP_HEADER_CONTENT_LENGTH)) {
1186             return http_request_header_line_invalid(r, 411, "POST-request, but content-length missing -> 411");
1187         }
1188     }
1189     else {
1190         /* (-1 == r->reqbody_length when Transfer-Encoding: chunked)*/
1191         if (-1 == r->reqbody_length
1192             && light_btst(r->rqst_htags, HTTP_HEADER_CONTENT_LENGTH)) {
1193             /* RFC7230 Hypertext Transfer Protocol (HTTP/1.1): Message Syntax and Routing
1194              * 3.3.3.  Message Body Length
1195              * [...]
1196              * If a message is received with both a Transfer-Encoding and a
1197              * Content-Length header field, the Transfer-Encoding overrides the
1198              * Content-Length.  Such a message might indicate an attempt to
1199              * perform request smuggling (Section 9.5) or response splitting
1200              * (Section 9.4) and ought to be handled as an error.  A sender MUST
1201              * remove the received Content-Length field prior to forwarding such
1202              * a message downstream.
1203              */
1204             const unsigned int http_header_strict =
1205               (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT);
1206             if (http_header_strict) {
1207                 return http_request_header_line_invalid(r, 400, "invalid Transfer-Encoding + Content-Length -> 400");
1208             }
1209             else {
1210                 /* ignore Content-Length */
1211                 http_header_request_unset(r, HTTP_HEADER_CONTENT_LENGTH, CONST_STR_LEN("Content-Length"));
1212             }
1213         }
1214         if (http_method_get_or_head(r->http_method)
1215             && !(http_parseopts & HTTP_PARSEOPT_METHOD_GET_BODY)) {
1216             return http_request_header_line_invalid(r, 400, "GET/HEAD with content-length -> 400");
1217         }
1218     }
1219 
1220     return 0;
1221 }
1222 
1223 
1224 static int
1225 http_request_parse_hoff (request_st * const restrict r, char * const restrict hdrs, const unsigned short * const restrict hoff, const int scheme_port)
1226 {
1227     /*
1228      * Request: "^(GET|POST|HEAD|...) ([^ ]+(\\?[^ ]+|)) (HTTP/1\\.[01])$"
1229      * Header : "^([-a-zA-Z]+): (.+)$"
1230      * End    : "^$"
1231      */
1232 
1233     int status;
1234     const unsigned int http_parseopts = r->conf.http_parseopts;
1235 
1236     status = http_request_parse_reqline(r, hdrs, hoff, http_parseopts);
1237     if (0 != status) return status;
1238 
1239     status = http_request_parse_headers(r, hdrs, hoff, http_parseopts);
1240     if (0 != status) return status;
1241 
1242     return http_request_parse(r, scheme_port);
1243 }
1244 
1245 
1246 static void
1247 http_request_headers_fin (request_st * const restrict r)
1248 {
1249     if (0 == r->http_status) {
1250       #if 0
1251         r->conditional_is_valid = (1 << COMP_SERVER_SOCKET)
1252                                 | (1 << COMP_HTTP_SCHEME)
1253                                 | (1 << COMP_HTTP_HOST)
1254                                 | (1 << COMP_HTTP_REMOTE_IP)
1255                                 | (1 << COMP_HTTP_REQUEST_METHOD)
1256                                 | (1 << COMP_HTTP_URL)
1257                                 | (1 << COMP_HTTP_QUERY_STRING)
1258                                 | (1 << COMP_HTTP_REQUEST_HEADER);
1259       #else
1260         /* all config conditions are valid after parsing header
1261          * (set all bits; remove dependency on plugin_config.h) */
1262         r->conditional_is_valid = ~0u;
1263       #endif
1264     }
1265     else {
1266         r->keep_alive = 0;
1267         r->reqbody_length = 0;
1268     }
1269 }
1270 
1271 
1272 void
1273 http_request_headers_process (request_st * const restrict r, char * const restrict hdrs, const unsigned short * const restrict hoff, const int scheme_port)
1274 {
1275     r->http_status = http_request_parse_hoff(r, hdrs, hoff, scheme_port);
1276 
1277     http_request_headers_fin(r);
1278 
1279     if (__builtin_expect( (0 != r->http_status), 0)) {
1280         if (r->conf.log_request_header_on_error) {
1281             /*(http_request_parse_headers() modifies hdrs only to
1282              * undo line-wrapping in-place using spaces)*/
1283             log_error_multiline(r->conf.errh, __FILE__, __LINE__,
1284                                 hdrs, r->rqst_header_len, "rqst: ");
1285         }
1286     }
1287 }
1288 
1289 
1290 void
1291 http_request_headers_process_h2 (request_st * const restrict r, const int scheme_port)
1292 {
1293     if (0 == r->http_status)
1294         r->http_status = http_request_parse(r, scheme_port);
1295 
1296     if (0 == r->http_status) {
1297         if (light_btst(r->rqst_htags, HTTP_HEADER_CONNECTION))
1298             r->http_status = http_request_header_line_invalid(r, 400,
1299               "invalid Connection header with HTTP/2 -> 400");
1300     }
1301 
1302     http_request_headers_fin(r);
1303 
1304     /* limited; headers not collected into a single buf for HTTP/2 */
1305     if (__builtin_expect( (0 != r->http_status), 0)) {
1306         if (r->conf.log_request_header_on_error) {
1307             log_error(r->conf.errh, __FILE__, __LINE__,
1308               "request-header:\n:authority: %s\n:method: %s\n:path: %s",
1309               r->http_host ? r->http_host->ptr : "",
1310               http_method_buf(r->http_method)->ptr,
1311               !buffer_is_blank(&r->target) ? r->target.ptr : "");
1312         }
1313     }
1314 
1315     /* ignore Upgrade if using HTTP/2 */
1316     if (light_btst(r->rqst_htags, HTTP_HEADER_UPGRADE))
1317         http_header_request_unset(r, HTTP_HEADER_UPGRADE,
1318                                   CONST_STR_LEN("upgrade"));
1319     /* XXX: should filter out other hop-by-hop connection headers, too */
1320 }
1321