xref: /lighttpd1.4/src/request.c (revision f2610d23)
1 /*
2  * request - HTTP request processing
3  *
4  * Fully-rewritten from original
5  * Copyright(c) 2018 Glenn Strauss gstrauss()gluelogic.com  All rights reserved
6  * License: BSD 3-clause (same as lighttpd)
7  */
8 #include "first.h"
9 
10 #include "request.h"
11 #include "burl.h"
12 #include "http_header.h"
13 #include "http_kv.h"
14 #include "log.h"
15 #include "sock_addr.h"
16 
17 #include <limits.h>
18 #include <stdint.h>
19 #include <stdlib.h>
20 #include <string.h>
21 
22 
23 __attribute_cold__
24 __attribute_noinline__
25 void
http_request_state_append(buffer * const b,request_state_t state)26 http_request_state_append (buffer * const b, request_state_t state)
27 {
28     static const struct sn { const char *s; uint32_t n; } states[] = {
29       { CONST_STR_LEN("connect") }
30      ,{ CONST_STR_LEN("req-start") }
31      ,{ CONST_STR_LEN("read") }
32      ,{ CONST_STR_LEN("req-end") }
33      ,{ CONST_STR_LEN("readpost") }
34      ,{ CONST_STR_LEN("handle-req") }
35      ,{ CONST_STR_LEN("resp-start") }
36      ,{ CONST_STR_LEN("write") }
37      ,{ CONST_STR_LEN("resp-end") }
38      ,{ CONST_STR_LEN("error") }
39      ,{ CONST_STR_LEN("close") }
40      ,{ CONST_STR_LEN("(unknown)") }
41     };
42     const struct sn * const p =
43       states +((uint32_t)state <= CON_STATE_CLOSE ? state : CON_STATE_CLOSE+1);
44     buffer_append_string_len(b, p->s, p->n);
45 }
46 
47 __attribute_cold__
48 __attribute_noinline__
49 __attribute_pure__
50 const char *
http_request_state_short(request_state_t state)51 http_request_state_short (request_state_t state)
52 {
53     /*((char *) returned, but caller must use only one char)*/
54     static const char sstates[] = ".qrQRhsWSECx";
55     return
56       sstates+((uint32_t)state <= CON_STATE_CLOSE ? state : CON_STATE_CLOSE+1);
57 }
58 
59 
60 __attribute_noinline__
__attribute_nonnull__()61 __attribute_nonnull__()
62 __attribute_pure__
63 static const char * http_request_check_uri_strict (const uint8_t * const restrict s, const uint_fast32_t len) {
64     for (uint_fast32_t i = 0; i < len; ++i) {
65         if (__builtin_expect( (s[i] <= 32),  0)) return (const char *)s+i;
66         if (__builtin_expect( (s[i] == 127), 0)) return (const char *)s+i;
67         if (__builtin_expect( (s[i] == 255), 0)) return (const char *)s+i;
68     }
69     return NULL;
70 }
71 
__attribute_nonnull__()72 __attribute_nonnull__()
73 __attribute_pure__
74 static const char * http_request_check_line_strict (const char * const restrict s, const uint_fast32_t len) {
75     for (uint_fast32_t i = 0; i < len; ++i) {
76         if (__builtin_expect( (((const uint8_t *)s)[i]<32), 0) && s[i] != '\t')
77             return s+i;
78         if (__builtin_expect( (s[i] == 127), 0))
79             return s+i;
80     }
81     return NULL;
82 }
83 
__attribute_nonnull__()84 __attribute_nonnull__()
85 __attribute_pure__
86 static const char * http_request_check_line_minimal (const char * const restrict s, const uint_fast32_t len) {
87     for (uint_fast32_t i = 0; i < len; ++i) {
88         if (__builtin_expect( (s[i] == '\0'), 0)) return s+i;
89         if (__builtin_expect( (s[i] == '\r'), 0)) return s+i;
90         if (__builtin_expect( (s[i] == '\n'), 0)) return s+i;
91     }
92     return NULL;
93 }
94 
request_check_hostname(buffer * const host)95 static int request_check_hostname(buffer * const host) {
96     /*
97      *       hostport      = host [ ":" port ]
98      *       host          = hostname | IPv4address | IPv6address
99      *       hostname      = *( domainlabel "." ) toplabel [ "." ]
100      *       domainlabel   = alphanum | alphanum *( alphanum | "-" ) alphanum
101      *       toplabel      = alpha | alpha *( alphanum | "-" ) alphanum
102      *       IPv4address   = 1*digit "." 1*digit "." 1*digit "." 1*digit
103      *       IPv6address   = "[" ... "]"
104      *       port          = *digit
105      */
106 
107     const char *h = host->ptr;
108 
109     if (*h != '[') {
110         uint32_t len = buffer_clen(host);
111         const char * const colon = memchr(h, ':', len);
112         uint32_t hlen = colon ? (uint32_t)(colon - h) : len;
113 
114         /* if hostname ends in ".", strip it */
115         if (__builtin_expect( (0 == hlen), 0)) return -1;
116         if (__builtin_expect( (h[hlen-1] == '.'), 0)) {
117             /* shift port info one left */
118             if (--hlen == 0) return -1;
119             --len;
120             if (NULL != colon)
121                 memmove(host->ptr+hlen, colon, len - hlen);
122             buffer_truncate(host, len);
123         }
124 
125         int label_len = 0;
126         int allnumeric = 1;
127         int numeric = 1;
128         int level = 0;
129         for (uint32_t i = 0; i < hlen; ++i) {
130             const int ch = h[i];
131             ++label_len;
132             if (light_isdigit(ch))
133                 continue;
134             else if ((light_isalpha(ch) || (ch == '-' && i != 0)))
135                 numeric = 0;
136             else if (ch == '.' && 1 != label_len && '-' != h[i+1]) {
137                 allnumeric &= numeric;
138                 numeric = 1;
139                 label_len = 0;
140                 ++level;
141             }
142             else
143                 return -1;
144         }
145         /* (if last segment numeric, then IPv4 and must have 4 numeric parts) */
146         if (0 == label_len || (numeric && (level != 3 || !allnumeric)))
147             return -1;
148 
149         h += hlen;
150     }
151     else {  /* IPv6 address */
152         /* check the address inside [...]; note: not fully validating */
153         /* (note: not allowing scoped literals, e.g. %eth0 suffix) */
154         ++h; /* step past '[' */
155         int cnt = 0;
156         while (light_isxdigit(*h) || *h == '.' || (*h == ':' && ++cnt < 8)) ++h;
157         /*(invalid char, too many ':', missing ']', or empty "[]")*/
158         if (*h != ']' || h - host->ptr == 1) return -1;
159         ++h; /* step past ']' */
160     }
161 
162     /* check numerical port, if present */
163     if (*h == ':') {
164         if (__builtin_expect( (h[1] == '\0'), 0)) /*(remove trailing colon)*/
165             buffer_truncate(host, h - host->ptr);
166         do { ++h; } while (light_isdigit(*h));
167     }
168 
169     return (*h == '\0') ? 0 : -1;
170 }
171 
http_request_host_normalize(buffer * const b,const int scheme_port)172 int http_request_host_normalize(buffer * const b, const int scheme_port) {
173     /*
174      * check for and canonicalize numeric IP address and portnum (optional)
175      * (IP address may be followed by ":portnum" (optional))
176      * - IPv6: "[...]"
177      * - IPv4: "x.x.x.x"
178      * - IPv4: 12345678   (32-bit decimal number)
179      * - IPv4: 012345678  (32-bit octal number)
180      * - IPv4: 0x12345678 (32-bit hex number)
181      *
182      * allow any chars (except ':' and '\0' and stray '[' or ']')
183      *   (other code may check chars more strictly or more pedantically)
184      * ':'  delimits (optional) port at end of string
185      * "[]" wraps IPv6 address literal
186      * '\0' should have been rejected earlier were it present
187      *
188      * any chars includes, but is not limited to:
189      * - allow '-' any where, even at beginning of word
190      *     (security caution: might be confused for cmd flag if passed to shell)
191      * - allow all-digit TLDs
192      *     (might be mistaken for IPv4 addr by inet_aton()
193      *      unless non-digits appear in subdomain)
194      */
195 
196     /* Note: not using getaddrinfo() since it does not support "[]" around IPv6
197      * and is not as lenient as inet_aton() and inet_addr() for IPv4 strings.
198      * Not using inet_pton() (when available) on IPv4 for similar reasons. */
199 
200     const char * const p = b->ptr;
201     const size_t blen = buffer_clen(b);
202     long port = 0;
203 
204     if (*p != '[') {
205         char * const colon = (char *)memchr(p, ':', blen);
206         if (colon) {
207             if (*p == ':') return -1; /*(empty host then port, or naked IPv6)*/
208             if (colon[1] != '\0') {
209                 char *e;
210                 port = strtol(colon+1, &e, 0); /*(allow decimal, octal, hex)*/
211                 if (0 < port && port <= USHRT_MAX && *e == '\0') {
212                     /* valid port */
213                 } else {
214                     return -1;
215                 }
216             } /*(else ignore stray colon at string end)*/
217             buffer_truncate(b, (size_t)(colon - p)); /*(remove port str)*/
218         }
219 
220         if (light_isdigit(*p)) do {
221             /* (IPv4 address literal or domain starting w/ digit (e.g. 3com))*/
222             /* (check one-element cache of normalized IPv4 address string) */
223             static struct { char s[INET_ADDRSTRLEN]; size_t n; } laddr;
224             size_t n = colon ? (size_t)(colon - p) : blen;
225             sock_addr addr;
226             if (n == laddr.n && 0 == memcmp(p, laddr.s, n)) break;
227             if (1 == sock_addr_inet_pton(&addr, p, AF_INET, 0)) {
228                 sock_addr_inet_ntop_copy_buffer(b, &addr);
229                 n = buffer_clen(b);
230                 if (n < sizeof(laddr.s)) memcpy(laddr.s, b->ptr, (laddr.n = n));
231             }
232         } while (0);
233     } else do { /* IPv6 addr */
234       #if defined(HAVE_IPV6) && defined(HAVE_INET_PTON)
235 
236         /* (check one-element cache of normalized IPv4 address string) */
237         static struct { char s[INET6_ADDRSTRLEN]; size_t n; } laddr;
238         sock_addr addr;
239         char *bracket = b->ptr+blen-1;
240         char *percent = strchr(b->ptr+1, '%');
241         size_t len;
242         int rc;
243         char buf[INET6_ADDRSTRLEN+16]; /*(+16 for potential %interface name)*/
244         if (blen <= 2) return -1; /*(invalid "[]")*/
245         if (*bracket != ']') {
246             bracket = (char *)memchr(b->ptr+1, ']', blen-1);
247             if (NULL == bracket || bracket[1] != ':'  || bracket - b->ptr == 1){
248                return -1;
249             }
250             if (bracket[2] != '\0') { /*(ignore stray colon at string end)*/
251                 char *e;
252                 port = strtol(bracket+2, &e, 0); /*(allow decimal, octal, hex)*/
253                 if (0 < port && port <= USHRT_MAX && *e == '\0') {
254                     /* valid port */
255                 } else {
256                     return -1;
257                 }
258             }
259         }
260 
261         len = (size_t)((percent ? percent : bracket) - (b->ptr+1));
262         if (laddr.n == len && 0 == memcmp(laddr.s, b->ptr+1, len)) {
263             /* truncate after ']' and re-add normalized port, if needed */
264             buffer_truncate(b, (size_t)(bracket - b->ptr + 1));
265             break;
266         }
267 
268         *bracket = '\0';/*(terminate IPv6 string)*/
269         if (percent) *percent = '\0'; /*(remove %interface from address)*/
270         rc = sock_addr_inet_pton(&addr, b->ptr+1, AF_INET6, 0);
271         if (percent) *percent = '%'; /*(restore %interface)*/
272         *bracket = ']'; /*(restore bracket)*/
273         if (1 != rc) return -1;
274 
275         sock_addr_inet_ntop(&addr, buf, sizeof(buf));
276         len = strlen(buf);
277         if (percent) {
278             if (percent > bracket) return -1;
279             if (len + (size_t)(bracket - percent) >= sizeof(buf)) return -1;
280             if (len < sizeof(laddr.s)) memcpy(laddr.s, buf, (laddr.n = len));
281             memcpy(buf+len, percent, (size_t)(bracket - percent));
282             len += (size_t)(bracket - percent);
283         }
284         buffer_truncate(b, 1); /* truncate after '[' */
285         buffer_append_str2(b, buf, len, CONST_STR_LEN("]"));
286 
287       #else
288 
289         return -1;
290 
291       #endif
292     } while (0);
293 
294     if (0 != port && port != scheme_port) {
295         buffer_append_char(b, ':');
296         buffer_append_int(b, (int)port);
297     }
298 
299     return 0;
300 }
301 
http_request_host_policy(buffer * const b,const unsigned int http_parseopts,const int scheme_port)302 int http_request_host_policy (buffer * const b, const unsigned int http_parseopts, const int scheme_port) {
303     /* caller should lowercase, as is done in http_request_header_set_Host(),
304      * for consistency in case the value is used prior to calling policy func */
305     /*buffer_to_lower(b);*/
306     return (((http_parseopts & HTTP_PARSEOPT_HOST_STRICT)
307                ? 0 != request_check_hostname(b)
308                : NULL != http_request_check_line_minimal(BUF_PTR_LEN(b)))
309             || ((http_parseopts & HTTP_PARSEOPT_HOST_NORMALIZE)
310                 && 0 != http_request_host_normalize(b, scheme_port)));
311 }
312 
313 __attribute_cold__
314 __attribute_noinline__
http_request_header_line_invalid(request_st * const restrict r,const int status,const char * const restrict msg)315 static int http_request_header_line_invalid(request_st * const restrict r, const int status, const char * const restrict msg) {
316     if (r->conf.log_request_header_on_error) {
317         if (msg) log_error(r->conf.errh, __FILE__, __LINE__, "%s", msg);
318     }
319     return status;
320 }
321 
322 __attribute_cold__
323 __attribute_noinline__
http_request_header_char_invalid(request_st * const restrict r,const char ch,const char * const restrict msg)324 static int http_request_header_char_invalid(request_st * const restrict r, const char ch, const char * const restrict msg) {
325     if (r->conf.log_request_header_on_error) {
326         if ((unsigned char)ch > 32 && ch != 127) {
327             log_error(r->conf.errh, __FILE__, __LINE__, "%s ('%c')", msg, ch);
328         }
329         else {
330             log_error(r->conf.errh, __FILE__, __LINE__, "%s (0x%x)", msg, ch);
331         }
332     }
333     return 400;
334 }
335 
336 
337 __attribute_noinline__
http_request_header_set_Host(request_st * const restrict r,const char * const h,size_t hlen)338 static void http_request_header_set_Host(request_st * const restrict r, const char * const h, size_t hlen)
339 {
340     r->http_host = http_header_request_set_ptr(r, HTTP_HEADER_HOST,
341                                                CONST_STR_LEN("Host"));
342     buffer_copy_string_len_lc(r->http_host, h, hlen);
343 }
344 
345 
346 int64_t
li_restricted_strtoint64(const char * v,const uint32_t vlen,const char ** const err)347 li_restricted_strtoint64 (const char *v, const uint32_t vlen, const char ** const err)
348 {
349     /* base 10 strtoll() parsing exactly vlen chars and requiring digits 0-9 */
350     /* rejects negative numbers and considers values > INT64_MAX an error */
351     /* note: errno is not set; detect error if *err != v+vlen upon return */
352     /*(caller must check 0 == vlen if that is to be an error for caller)*/
353     int64_t rv = 0;
354     uint32_t i;
355     for (i = 0; i < vlen; ++i) {
356         const uint8_t c = ((uint8_t *)v)[i] - '0'; /*(unsigned; underflow ok)*/
357         if (c > 9) break;
358         if (rv > INT64_MAX/10) break;
359         rv *= 10;
360         if (rv > INT64_MAX - c) break;
361         rv += c;
362     }
363     *err = v+i;
364     return rv;
365 }
366 
367 
368 __attribute_cold__
http_request_parse_duplicate(request_st * const restrict r,const enum http_header_e id,const char * const restrict k,const size_t klen,const char * const restrict v,const size_t vlen)369 static int http_request_parse_duplicate(request_st * const restrict r, const enum http_header_e id, const char * const restrict k, const size_t klen, const char * const restrict v, const size_t vlen) {
370     /* Proxies sometimes send dup headers
371      * if they are the same we ignore the second
372      * if not, we raise an error */
373     const buffer * const vb = http_header_request_get(r, id, k, klen);
374     if (vb && buffer_eq_icase_slen(vb, v, vlen))
375         return 0; /* ignore header; matches existing header */
376 
377     const char *errmsg;
378     switch (id) {
379       case HTTP_HEADER_HOST:
380         errmsg = "duplicate Host header -> 400";
381         break;
382       case HTTP_HEADER_CONTENT_TYPE:
383         errmsg = "duplicate Content-Type header -> 400";
384         break;
385       case HTTP_HEADER_IF_MODIFIED_SINCE:
386         errmsg = "duplicate If-Modified-Since header -> 400";
387         break;
388       case HTTP_HEADER_HTTP2_SETTINGS:
389         errmsg = "duplicate HTTP2-Settings header -> 400";
390         break;
391       default:
392         errmsg = "duplicate header -> 400";
393         break;
394       case HTTP_HEADER_IF_NONE_MATCH:
395         /* if dup, only the first one will survive */
396         return 0; /* ignore header */
397     }
398     return http_request_header_line_invalid(r, 400, errmsg);
399 }
400 
401 
402 /* add header to list of headers
403  * certain headers are also parsed
404  * might drop a header if deemed unnecessary/broken
405  *
406  * returns 0 on success, HTTP status on error
407  */
http_request_parse_single_header(request_st * const restrict r,const enum http_header_e id,const char * const restrict k,const size_t klen,const char * const restrict v,const size_t vlen)408 static int http_request_parse_single_header(request_st * const restrict r, const enum http_header_e id, const char * const restrict k, const size_t klen, const char * const restrict v, const size_t vlen) {
409     /*
410      * Note: k might not be '\0'-terminated
411      * Note: v is not '\0'-terminated
412      *   With lighttpd HTTP/1.1 parser, v ends with whitespace
413      *     (one of '\r' '\n' ' ' '\t')
414      *   With lighttpd HTTP/2 parser, v should not be accessed beyond vlen
415      *     (care must be taken to avoid libc funcs which expect z-strings)
416      */
417     /*assert(vlen);*//*(caller must not call this func with 0 klen or 0 vlen)*/
418 
419     switch (id) {
420       /*case HTTP_HEADER_OTHER:*/
421       default:
422         break;
423       case HTTP_HEADER_HOST:
424         if (!light_btst(r->rqst_htags, HTTP_HEADER_HOST)) {
425             if (vlen >= 1024) { /*(expecting < 256)*/
426                 return http_request_header_line_invalid(r, 400, "uri-authority too long -> 400");
427             }
428             /*(http_request_header_append() plus sets r->http_host)*/
429             http_request_header_set_Host(r, v, vlen);
430             return 0;
431         }
432         else if (NULL != r->http_host
433                  && __builtin_expect( buffer_eq_slen(r->http_host,v,vlen), 1)) {
434             /* ignore all Host: headers if match authority in request line */
435             /* (expect Host to match case in :authority of HTTP/2 request) */
436             return 0; /* ignore header */
437         }
438         /* else parse duplicate for match or error */
439         __attribute_fallthrough__
440       case HTTP_HEADER_IF_MODIFIED_SINCE:
441       case HTTP_HEADER_IF_NONE_MATCH:
442       case HTTP_HEADER_CONTENT_TYPE:
443       case HTTP_HEADER_HTTP2_SETTINGS:
444         if (light_btst(r->rqst_htags, id))
445             return http_request_parse_duplicate(r, id, k, klen, v, vlen);
446         break;
447       case HTTP_HEADER_CONNECTION:
448         /* "Connection: close" is common case if header is present */
449         if ((vlen == 5 && buffer_eq_icase_ssn(v, CONST_STR_LEN("close")))
450             || http_header_str_contains_token(v,vlen,CONST_STR_LEN("close"))) {
451             r->keep_alive = 0;
452             break;
453         }
454         if (http_header_str_contains_token(v,vlen,CONST_STR_LEN("keep-alive"))){
455             r->keep_alive = 1;
456             break;
457         }
458         break;
459       case HTTP_HEADER_CONTENT_LENGTH:
460         if (!light_btst(r->rqst_htags, HTTP_HEADER_CONTENT_LENGTH)) {
461             /*(trailing whitespace was removed from vlen)*/
462             /*(not using strtoll() since v might not be z-string)*/
463             const char *err;
464             off_t clen = (off_t)li_restricted_strtoint64(v, vlen, &err);
465             if (err == v+vlen) {
466                 /* (set only if not set to -1 by Transfer-Encoding: chunked) */
467                 if (0 == r->reqbody_length) r->reqbody_length = clen;
468             }
469             else {
470                 return http_request_header_line_invalid(r, 400, "invalid Content-Length header -> 400");
471             }
472         }
473         else {
474             return http_request_header_line_invalid(r, 400, "duplicate Content-Length header -> 400");
475         }
476         break;
477       case HTTP_HEADER_TRANSFER_ENCODING:
478         if (HTTP_VERSION_1_1 != r->http_version) {
479             return http_request_header_line_invalid(r, 400,
480               HTTP_VERSION_1_0 == r->http_version
481                 ? "HTTP/1.0 with Transfer-Encoding (bad HTTP/1.0 proxy?) -> 400"
482                 : "HTTP/2 with Transfer-Encoding is invalid -> 400");
483         }
484 
485         if (!buffer_eq_icase_ss(v, vlen, CONST_STR_LEN("chunked"))) {
486             /* Transfer-Encoding might contain additional encodings,
487              * which are not currently supported by lighttpd */
488             return http_request_header_line_invalid(r, 501, NULL); /* Not Implemented */
489         }
490         r->reqbody_length = -1;
491 
492         /* Transfer-Encoding is a hop-by-hop header,
493          * which must not be blindly forwarded to backends */
494         return 0; /* skip header */
495     }
496 
497     http_header_request_append(r, id, k, klen, v, vlen);
498     return 0;
499 }
500 
501 __attribute_cold__
http_request_parse_proto_loose(request_st * const restrict r,const char * const restrict ptr,const size_t len,const unsigned int http_parseopts)502 static int http_request_parse_proto_loose(request_st * const restrict r, const char * const restrict ptr, const size_t len, const unsigned int http_parseopts) {
503     const char * proto = memchr(ptr, ' ', len);
504     if (NULL == proto)
505         return http_request_header_line_invalid(r, 400, "incomplete request line -> 400");
506     proto = memchr(proto+1, ' ', len - (proto+1 - ptr));
507     if (NULL == proto)
508         return http_request_header_line_invalid(r, 400, "incomplete request line -> 400");
509     ++proto;
510 
511     if (proto[0]=='H' && proto[1]=='T' && proto[2]=='T' && proto[3]=='P' && proto[4] == '/') {
512         if (proto[5] == '1' && proto[6] == '.' && (proto[7] == '1' || proto[7] == '0')) {
513             /* length already checked before calling this routine */
514             /* (len != (size_t)(proto - ptr + 8)) */
515             if (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT) /*(http_header_strict)*/
516                 return http_request_header_line_invalid(r, 400, "incomplete request line -> 400");
517             r->http_version = (proto[7] == '1') ? HTTP_VERSION_1_1 : HTTP_VERSION_1_0;
518         }
519         else
520             return http_request_header_line_invalid(r, 505, "unknown HTTP version -> 505");
521     }
522     else
523         return http_request_header_line_invalid(r, 400, "unknown protocol -> 400");
524 
525     /* keep-alive default: HTTP/1.1 -> true; HTTP/1.0 -> false */
526     r->keep_alive = (HTTP_VERSION_1_0 != r->http_version);
527 
528     return 0;
529 }
530 
531 __attribute_cold__
http_request_parse_reqline_uri(request_st * const restrict r,const char * const restrict uri,const size_t len,const unsigned int http_parseopts)532 static const char * http_request_parse_reqline_uri(request_st * const restrict r, const char * const restrict uri, const size_t len, const unsigned int http_parseopts) {
533     const char *nuri;
534     if ((len > 7 && buffer_eq_icase_ssn(uri, "http://", 7)
535         && NULL != (nuri = memchr(uri + 7, '/', len-7)))
536        ||
537        (len > 8 && buffer_eq_icase_ssn(uri, "https://", 8)
538         && NULL != (nuri = memchr(uri + 8, '/', len-8)))) {
539         const char * const host = uri + (uri[4] == ':' ? 7 : 8);
540         const size_t hostlen = nuri - host;
541         if (0 == hostlen || hostlen >= 1024) { /*(expecting < 256)*/
542             http_request_header_line_invalid(r, 400, "uri-authority empty or too long -> 400");
543             return NULL;
544         }
545         /* Insert as "Host" header */
546         http_request_header_set_Host(r, host, hostlen);
547         return nuri;
548     } else if (!(http_parseopts & HTTP_PARSEOPT_HEADER_STRICT) /*(!http_header_strict)*/
549            || (HTTP_METHOD_CONNECT == r->http_method && (uri[0] == ':' || light_isdigit(uri[0])))
550            || (HTTP_METHOD_OPTIONS == r->http_method && uri[0] == '*' && 1 == len)) {
551         /* (permitted) */
552         return uri;
553     } else {
554         http_request_header_line_invalid(r, 400, "request-URI parse error -> 400");
555         return NULL;
556     }
557 }
558 
559 
560 __attribute_cold__
561 __attribute_noinline__
562 static int http_request_parse_header_other(request_st * const restrict r, const char * const restrict k, const int klen, const unsigned int http_header_strict);
563 
564 
565 int
http_request_validate_pseudohdrs(request_st * const restrict r,const int scheme,const unsigned int http_parseopts)566 http_request_validate_pseudohdrs (request_st * const restrict r, const int scheme, const unsigned int http_parseopts)
567 {
568     /* :method is required to indicate method
569      * CONNECT method must have :method and :authority
570      *   unless RFC8441 CONNECT extension, which must follow 'other' (below)
571      * All other methods must have at least :method :scheme :path */
572 
573     if (HTTP_METHOD_UNSET == r->http_method)
574         return http_request_header_line_invalid(r, 400,
575           "missing pseudo-header method -> 400");
576 
577     if (HTTP_METHOD_CONNECT != r->http_method)
578         r->h2_connect_ext = 0;
579 
580     if (__builtin_expect( (HTTP_METHOD_CONNECT != r->http_method), 1)
581         || __builtin_expect( (r->h2_connect_ext != 0), 0)) {
582 
583         if (!scheme)
584             return http_request_header_line_invalid(r, 400,
585               "missing pseudo-header scheme -> 400");
586 
587         if (buffer_is_blank(&r->target))
588             return http_request_header_line_invalid(r, 400,
589               "missing pseudo-header path -> 400");
590 
591         const char * const uri = r->target.ptr;
592         if (*uri != '/') { /* (common case: (*uri == '/')) */
593             if (uri[0] != '*' || uri[1] != '\0'
594                 || HTTP_METHOD_OPTIONS != r->http_method)
595                 return http_request_header_line_invalid(r, 400,
596                   "invalid pseudo-header path -> 400");
597         }
598     }
599     else { /* HTTP_METHOD_CONNECT */
600         if (NULL == r->http_host)
601             return http_request_header_line_invalid(r, 400,
602               "missing pseudo-header authority -> 400");
603         if (!buffer_is_blank(&r->target) || scheme)
604             return http_request_header_line_invalid(r, 400,
605               "invalid pseudo-header with CONNECT -> 400");
606         /* note: this copy occurs prior to http_request_host_policy()
607          * so any consumer handling CONNECT should normalize r->target
608          * as appropriate */
609         buffer_copy_buffer(&r->target, r->http_host);
610     }
611     buffer_copy_buffer(&r->target_orig, &r->target);
612 
613     /* r->http_host, if set, is checked with http_request_host_policy()
614      * in http_request_parse() */
615 
616     /* copied and modified from end of http_request_parse_reqline() */
617 
618     /* check uri for invalid characters */
619     const uint32_t len = buffer_clen(&r->target);/*(http_header_strict)*/
620     const char * const x = (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT)
621       ? (http_parseopts & HTTP_PARSEOPT_URL_NORMALIZE_CTRLS_REJECT)
622           ? NULL /* URI will be checked in http_request_parse_target() */
623           : http_request_check_uri_strict((const uint8_t *)r->target.ptr, len)
624       : http_request_check_line_minimal(r->target.ptr, len);
625     return (NULL == x)
626       ? 0
627       : http_request_header_char_invalid(r, *x,
628           "invalid character in URI -> 400");
629 }
630 
631 
632 int
http_request_parse_header(request_st * const restrict r,http_header_parse_ctx * const restrict hpctx)633 http_request_parse_header (request_st * const restrict r, http_header_parse_ctx * const restrict hpctx)
634 {
635     /* Note: k and v might not be '\0' terminated strings;
636      * care must be taken to avoid libc funcs which expect z-strings */
637     const char * const restrict k = hpctx->k;
638     const char * restrict v = hpctx->v;
639     const uint32_t klen = hpctx->klen;
640     uint32_t vlen = hpctx->vlen;
641 
642     if (0 == klen)
643         return http_request_header_line_invalid(r, 400,
644           "invalid header key -> 400");
645 
646     if ((hpctx->hlen += klen + vlen + 4) > hpctx->max_request_field_size) {
647         /*(configurable with server.max-request-field-size; default 8k)*/
648       #if 1 /* emit to error log for people sending large headers */
649         log_error(r->conf.errh, __FILE__, __LINE__,
650                   "oversized request header -> 431");
651         return 431; /* Request Header Fields Too Large */
652       #else
653         /* 431 Request Header Fields Too Large */
654         return http_request_header_line_invalid(r, 431,
655           "oversized request header -> 431");
656       #endif
657     }
658 
659     if (!hpctx->trailers) {
660         if (*k == ':') {
661             /* HTTP/2 request pseudo-header fields */
662             if (!hpctx->pseudo) /*(pseudo header after non-pseudo header)*/
663                 return http_request_header_line_invalid(r, 400,
664                   "invalid pseudo-header -> 400");
665             if (0 == vlen)
666                 return http_request_header_line_invalid(r, 400,
667                   "invalid header value -> 400");
668 
669             /* (note: relies on implementation details using ls-hpack in h2.c)
670              * (hpctx->id mapped from lsxpack_header_t hpack_index, which only
671              *  matches key, not also value, if lsxpack_header_t flags does not
672              *  have LSXPACK_HPACK_VAL_MATCHED set, so HTTP_HEADER_H2_METHOD_GET
673              *  below indicates any method, not only "GET") */
674             if (__builtin_expect( (hpctx->id == HTTP_HEADER_H2_UNKNOWN), 0)) {
675                 switch (klen-1) {
676                   case 4:
677                     if (0 == memcmp(k+1, "path", 4))
678                         hpctx->id = HTTP_HEADER_H2_PATH;
679                     break;
680                   case 6:
681                     if (0 == memcmp(k+1, "method", 6))
682                         hpctx->id = HTTP_HEADER_H2_METHOD_GET;
683                     else if (0 == memcmp(k+1, "scheme", 6))
684                         hpctx->id = HTTP_HEADER_H2_SCHEME_HTTP;
685                     break;
686                   case 8:
687                     if (0 == memcmp(k+1, "protocol", 8))
688                         hpctx->id = HTTP_HEADER_H2_PROTOCOL;
689                     break;
690                   case 9:
691                     if (0 == memcmp(k+1, "authority", 9))
692                         hpctx->id = HTTP_HEADER_H2_AUTHORITY;
693                     break;
694                   default:
695                     break;
696                 }
697                 if (hpctx->id >= HTTP_HEADER_H2_UNKNOWN)
698                     return http_request_header_line_invalid(r, 400,
699                       "invalid pseudo-header -> 400");
700             }
701 
702             switch (hpctx->id) {
703               case HTTP_HEADER_H2_AUTHORITY:
704                 if (__builtin_expect( (r->http_host != NULL), 0))
705                     break;
706                 if (vlen >= 1024) /*(expecting < 256)*/
707                     return http_request_header_line_invalid(r, 400,
708                       "invalid pseudo-header authority too long -> 400");
709                 /* insert as "Host" header */
710                 http_request_header_set_Host(r, v, vlen);
711                 return 0;
712               case HTTP_HEADER_H2_METHOD_GET:  /*(any method, not only "GET")*/
713               case HTTP_HEADER_H2_METHOD_POST:
714                 if (__builtin_expect( (HTTP_METHOD_UNSET != r->http_method), 0))
715                     break;
716                 r->http_method = get_http_method_key(v, vlen);
717                 if (HTTP_METHOD_UNSET >= r->http_method)
718                     return http_request_header_line_invalid(r, 501,
719                       "unknown http-method -> 501");
720                 return 0;
721               case HTTP_HEADER_H2_PATH:            /*(any path, not only "/")*/
722               case HTTP_HEADER_H2_PATH_INDEX_HTML:
723                 if (__builtin_expect( (!buffer_is_blank(&r->target)), 0))
724                     break;
725                 buffer_copy_string_len(&r->target, v, vlen);
726                 return 0;
727               case HTTP_HEADER_H2_SCHEME_HTTP: /*(any scheme, not only "http")*/
728               case HTTP_HEADER_H2_SCHEME_HTTPS:
729                 if (__builtin_expect( (hpctx->scheme), 0))
730                     break;
731                 hpctx->scheme = 1; /*(marked present, but otherwise ignored)*/
732                 return 0;
733                #if 0
734                 switch (vlen) {/*(validated, but then ignored)*/
735                   case 5: /* "https" */
736                     if (v[4]!='s') break;
737                     __attribute_fallthrough__
738                   case 4: /* "http" */
739                     if (v[0]=='h' && v[1]=='t' && v[2]=='t' && v[3]=='p') {
740                         hpctx->scheme = 1;
741                         return 0;
742                     }
743                     break;
744                   default:
745                     break;
746                 }
747                 return http_request_header_line_invalid(r, 400,
748                   "unknown pseudo-header scheme -> 400");
749                #endif
750               case HTTP_HEADER_H2_PROTOCOL:
751                 /* support only ":protocol: websocket" for now */
752                 if (vlen != 9 || 0 != memcmp(v, "websocket", 9))
753                     return http_request_header_line_invalid(r, 405,
754                       "unhandled :protocol value -> 405");
755                 /*(future: might be enum of recognized :protocol: ext values)*/
756                 r->h2_connect_ext = 1;
757                 return 0;
758               default:
759                 return http_request_header_line_invalid(r, 400,
760                   "invalid pseudo-header -> 400");
761             }
762             return http_request_header_line_invalid(r, 400,
763               "repeated pseudo-header -> 400");
764         }
765         else { /*(non-pseudo headers)*/
766             if (hpctx->pseudo) { /*(transition to non-pseudo headers)*/
767                 hpctx->pseudo = 0;
768                 int status =
769                   http_request_validate_pseudohdrs(r, hpctx->scheme,
770                                                    hpctx->http_parseopts);
771                 if (0 != status) return status;
772             }
773             if (0 == vlen)
774                 return 0;
775 
776             const unsigned int http_header_strict =
777               (hpctx->http_parseopts & HTTP_PARSEOPT_HEADER_STRICT);
778 
779             const char * const x = (http_header_strict)
780               ? http_request_check_line_strict(v, vlen)
781               : http_request_check_line_minimal(v, vlen);
782             if (x)
783                 return http_request_header_char_invalid(r, *x,
784                   "invalid character in header -> 400");
785 
786             /* remove leading and trailing whitespace (strict RFC conformance)*/
787             if (__builtin_expect( (*v <= 0x20), 0)) {
788                 while ((*v == ' ' || *v == '\t') && (++v, --vlen)) ;
789                 if (0 == vlen)
790                     return 0;
791             }
792             if (__builtin_expect( (v[vlen-1] <= 0x20), 0)) {
793                 while (v[vlen-1] == ' ' || v[vlen-1] == '\t') --vlen;
794             }
795 
796             if (__builtin_expect( (hpctx->id == HTTP_HEADER_H2_UNKNOWN), 0)) {
797                 uint32_t j = 0;
798                 while (j < klen && (light_islower(k[j]) || k[j] == '-'))
799                     ++j;
800 
801                 if (__builtin_expect( (j != klen), 0)) {
802                     if (light_isupper(k[j]))
803                         return 400;
804                     if (0 != http_request_parse_header_other(r, k+j, klen-j,
805                                                             http_header_strict))
806                         return 400;
807                 }
808 
809                 hpctx->id = http_header_hkey_get_lc(k, klen);
810             }
811 
812             const enum http_header_e id = (enum http_header_e)hpctx->id;
813 
814             if (__builtin_expect( (id == HTTP_HEADER_TE), 0)
815                 && !buffer_eq_icase_ss(v, vlen, CONST_STR_LEN("trailers")))
816                 return http_request_header_line_invalid(r, 400,
817                   "invalid TE header value with HTTP/2 -> 400");
818 
819             return http_request_parse_single_header(r, id, k, klen, v, vlen);
820         }
821     }
822     else { /*(trailers)*/
823         if (*k == ':')
824             return http_request_header_line_invalid(r, 400,
825               "invalid pseudo-header in trailers -> 400");
826         /* ignore trailers (after required HPACK decoding) if streaming
827          * request body to backend since headers have already been sent
828          * to backend via Common Gateway Interface (CGI) (CGI, FastCGI,
829          * SCGI, etc) or HTTP/1.1 (proxy) (mod_proxy does not currently
830          * support using HTTP/2 to connect to backends) */
831       #if 0 /* (if needed, save flag in hpctx instead of fdevent.h dependency)*/
832         if (r->conf.stream_request_body & FDEVENT_STREAM_REQUEST)
833             return 0;
834       #endif
835         /* Note: do not unconditionally merge into headers since if
836          * headers had already been sent to backend, then mod_accesslog
837          * logging of request headers might be inaccurate.
838          * Many simple backends do not support HTTP/1.1 requests sending
839          * Transfer-Encoding: chunked, and even those that do might not
840          * handle trailers.  Some backends do not even support HTTP/1.1.
841          * For all these reasons, ignore trailers if streaming request
842          * body to backend.  Revisit in future if adding support for
843          * connecting to backends using HTTP/2 (with explicit config
844          * option to force connecting to backends using HTTP/2) */
845 
846         /* XXX: TODO: request trailers not handled if streaming reqbody
847          * XXX: must ensure that trailers are not disallowed field-names
848          */
849 
850       #if 0
851         if (0 == vlen)
852             return 0;
853       #endif
854 
855         return 0;
856     }
857 }
858 
859 
http_request_parse_reqline(request_st * const restrict r,const char * const restrict ptr,const unsigned short * const restrict hoff,const unsigned int http_parseopts)860 static int http_request_parse_reqline(request_st * const restrict r, const char * const restrict ptr, const unsigned short * const restrict hoff, const unsigned int http_parseopts) {
861     size_t len = hoff[2];
862 
863     /* parse the first line of the request
864      * <method> <uri> <protocol>\r\n
865      * */
866     if (len < 13) /* minimum len with (!http_header_strict): "x x HTTP/1.0\n" */
867         return http_request_header_line_invalid(r, 400, "invalid request line (too short) -> 400");
868     if (ptr[len-2] == '\r')
869         len-=2;
870     else if (!(http_parseopts & HTTP_PARSEOPT_HEADER_STRICT)) /*(!http_header_strict)*/
871         len-=1;
872     else
873         return http_request_header_line_invalid(r, 400, "missing CR before LF in header -> 400");
874 
875     /*
876      * RFC7230:
877      *   HTTP-version  = HTTP-name "/" DIGIT "." DIGIT
878      *   HTTP-name     = %x48.54.54.50 ; "HTTP", case-sensitive
879      */
880 
881     /* protocol is expected to be " HTTP/1.1" or " HTTP/1.0" at end of line */
882     union proto_un {
883       char c[8];
884       uint64_t u;
885     };
886     static const union proto_un http_1_1 = {{'H','T','T','P','/','1','.','1'}};
887     static const union proto_un http_1_0 = {{'H','T','T','P','/','1','.','0'}};
888     const char *p = ptr + len - 8;
889     union proto_un proto8;
890     proto8.c[0]=p[0]; proto8.c[1]=p[1]; proto8.c[2]=p[2]; proto8.c[3]=p[3];
891     proto8.c[4]=p[4]; proto8.c[5]=p[5]; proto8.c[6]=p[6]; proto8.c[7]=p[7];
892     if (p[-1] == ' ' && http_1_1.u == proto8.u) {
893         r->http_version = HTTP_VERSION_1_1;
894         r->keep_alive = 1; /* keep-alive default: HTTP/1.1 -> true */
895     }
896     else if (p[-1] == ' ' && http_1_0.u == proto8.u) {
897         r->http_version = HTTP_VERSION_1_0;
898         r->keep_alive = 0; /* keep-alive default: HTTP/1.0 -> false */
899     }
900     else {
901         int status = http_request_parse_proto_loose(r,ptr,len,http_parseopts);
902         if (0 != status) return status;
903         /*(space char must exist if http_request_parse_proto_loose() succeeds)*/
904         for (p = ptr + len - 9; p[-1] != ' '; --p) ;
905     }
906 
907     /* method is expected to be a short string in the general case */
908     size_t i = 0;
909     while (ptr[i] != ' ') ++i;
910   #if 0 /*(space must exist if protocol was parsed successfully)*/
911     while (i < len && ptr[i] != ' ') ++i;
912     if (ptr[i] != ' ')
913         return http_request_header_line_invalid(r, 400, "incomplete request line -> 400");
914   #endif
915 
916     r->http_method = get_http_method_key(ptr, i);
917     if (HTTP_METHOD_UNSET >= r->http_method)
918         return http_request_header_line_invalid(r, 501, "unknown http-method -> 501");
919 
920     const char *uri = ptr + i + 1;
921 
922     if (uri == p)
923         return http_request_header_line_invalid(r, 400, "no uri specified -> 400");
924     len = (size_t)(p - uri - 1);
925 
926     if (*uri != '/') { /* (common case: (*uri == '/')) */
927         uri = http_request_parse_reqline_uri(r, uri, len, http_parseopts);
928         if (NULL == uri) return 400;
929         len = (size_t)(p - uri - 1);
930     }
931 
932     if (0 == len)
933         return http_request_header_line_invalid(r, 400, "no uri specified -> 400");
934 
935     /* check uri for invalid characters */     /* http_header_strict */
936     const char * const x = (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT)
937       ? (http_parseopts & HTTP_PARSEOPT_URL_NORMALIZE_CTRLS_REJECT)
938           ? NULL /* URI will be checked in http_request_parse_target() */
939           : http_request_check_uri_strict((const uint8_t *)uri, len)
940       : memchr(ptr, '\0', hoff[hoff[0]]);/* check entire headers set for '\0' */
941     if (x)
942         http_request_header_char_invalid(r, *x, "invalid character in URI -> 400");
943 
944     buffer_copy_string_len(&r->target, uri, len);
945     buffer_copy_string_len(&r->target_orig, uri, len);
946     return 0;
947 }
948 
http_request_parse_target(request_st * const r,int scheme_port)949 int http_request_parse_target(request_st * const r, int scheme_port) {
950     /* URI is parsed into components at start of request and may
951      * also be re-parsed upon HANDLER_COMEBACK during the request
952      * r->target is expected to be a "/url-part?query-part"
953      *   (and *not* a fully-qualified URI starting https://...)
954      * r->uri.authority is expected to be parsed elsewhere into r->http_host
955      */
956 
957     /**
958      * prepare strings
959      *
960      * - uri.path
961      * - uri.query
962      *
963      */
964 
965     /**
966      * Name according to RFC 2396
967      *
968      * - scheme
969      * - authority
970      * - path
971      * - query
972      *
973      * (scheme)://(authority)(path)?(query)#fragment
974      *
975      */
976 
977     /* take initial scheme value from connection-level state
978      * (request r->uri.scheme can be overwritten for later,
979      *  for example by mod_extforward or mod_magnet) */
980     buffer_copy_string_len(&r->uri.scheme, "https", scheme_port == 443 ? 5 : 4);
981 
982     buffer * const target = &r->target;
983     if ((r->http_method == HTTP_METHOD_CONNECT && !r->h2_connect_ext)
984         || (r->http_method == HTTP_METHOD_OPTIONS
985             && target->ptr[0] == '*'
986             && target->ptr[1] == '\0')) {
987         /* CONNECT ... (or) OPTIONS * ... */
988         buffer_copy_buffer(&r->uri.path, target);
989         buffer_clear(&r->uri.query);
990         return 0;
991     }
992 
993     char *qstr;
994     if (r->conf.http_parseopts & HTTP_PARSEOPT_URL_NORMALIZE) {
995         /*uint32_t len = buffer_clen(target);*/
996         int qs = burl_normalize(target, r->tmp_buf, r->conf.http_parseopts);
997         if (-2 == qs)
998             return http_request_header_line_invalid(r, 400,
999               "invalid character in URI -> 400"); /* Bad Request */
1000         qstr = (-1 == qs) ? NULL : target->ptr+qs;
1001       #if 0  /* future: might enable here, or below for all requests */
1002         /* (Note: total header size not recalculated on HANDLER_COMEBACK
1003          *  even if other request headers changed during processing)
1004          * (If (0 != r->loops_per_request), then the generated
1005          *  request is too large.  Should a different error be returned?) */
1006         r->rqst_header_len -= len;
1007         len = buffer_clen(target);
1008         r->rqst_header_len += len;
1009         if (len > MAX_HTTP_REQUEST_URI) {
1010             return 414; /* 414 URI Too Long */
1011         }
1012         if (r->rqst_header_len > MAX_HTTP_REQUEST_HEADER) {
1013             log_error(r->conf.errh, __FILE__, __LINE__,
1014               "request header fields too large: %u -> 431",
1015               r->rqst_header_len);
1016             return 431; /* Request Header Fields Too Large */
1017         }
1018       #endif
1019     }
1020     else {
1021         size_t rlen = buffer_clen(target);
1022         qstr = memchr(target->ptr, '#', rlen);/* discard fragment */
1023         if (qstr) {
1024             rlen = (size_t)(qstr - target->ptr);
1025             buffer_truncate(target, rlen);
1026         }
1027         qstr = memchr(target->ptr, '?', rlen);
1028     }
1029 
1030     /** extract query string from target */
1031     const char * const pstr = target->ptr;
1032     const uint32_t rlen = buffer_clen(target);
1033     uint32_t plen;
1034     if (NULL != qstr) {
1035         plen = (uint32_t)(qstr - pstr);
1036         buffer_copy_string_len(&r->uri.query, qstr + 1, rlen - plen - 1);
1037     }
1038     else {
1039         plen = rlen;
1040         buffer_clear(&r->uri.query);
1041     }
1042     buffer_copy_string_len(&r->uri.path, pstr, plen);
1043 
1044     /* decode url to path
1045      *
1046      * - decode url-encodings  (e.g. %20 -> ' ')
1047      * - remove path-modifiers (e.g. /../)
1048      */
1049 
1050     buffer_urldecode_path(&r->uri.path);
1051     buffer_path_simplify(&r->uri.path);
1052     if (r->uri.path.ptr[0] != '/')
1053         return http_request_header_line_invalid(r, 400,
1054           "uri-path does not begin with '/' -> 400"); /* Bad Request */
1055 
1056     return 0;
1057 }
1058 
1059 __attribute_cold__
1060 __attribute_noinline__
http_request_parse_header_other(request_st * const restrict r,const char * const restrict k,const int klen,const unsigned int http_header_strict)1061 static int http_request_parse_header_other(request_st * const restrict r, const char * const restrict k, const int klen, const unsigned int http_header_strict) {
1062     for (int i = 0; i < klen; ++i) {
1063         if (light_isalpha(k[i]) || k[i] == '-') continue; /*(common cases)*/
1064         /**
1065          * 1*<any CHAR except CTLs or separators>
1066          * CTLs == 0-31 + 127, CHAR = 7-bit ascii (0..127)
1067          *
1068          */
1069         switch(k[i]) {
1070         case ' ':
1071         case '\t':
1072             return http_request_header_line_invalid(r, 400, "WS character in key -> 400");
1073         case '\r':
1074         case '\n':
1075         case '(':
1076         case ')':
1077         case '<':
1078         case '>':
1079         case '@':
1080         case ',':
1081         case ':':
1082         case ';':
1083         case '\\':
1084         case '\"':
1085         case '/':
1086         case '[':
1087         case ']':
1088         case '?':
1089         case '=':
1090         case '{':
1091         case '}':
1092             return http_request_header_char_invalid(r, k[i], "invalid character in header key -> 400");
1093         default:
1094             if (http_header_strict ? (k[i] < 32 || ((unsigned char *)k)[i] >= 127) : k[i] == '\0')
1095                 return http_request_header_char_invalid(r, k[i], "invalid character in header key -> 400");
1096             break; /* ok */
1097         }
1098     }
1099     return 0;
1100 }
1101 
http_request_parse_headers(request_st * const restrict r,char * const restrict ptr,const unsigned short * const restrict hoff,const unsigned int http_parseopts)1102 static int http_request_parse_headers(request_st * const restrict r, char * const restrict ptr, const unsigned short * const restrict hoff, const unsigned int http_parseopts) {
1103     const unsigned int http_header_strict = (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT);
1104 
1105   #if 0 /*(not checked here; will later result in invalid label for HTTP header)*/
1106     int i = hoff[2];
1107 
1108     if (ptr[i] == ' ' || ptr[i] == '\t') {
1109         return http_request_header_line_invalid(r, 400, "WS at the start of first line -> 400");
1110     }
1111   #endif
1112 
1113     for (int i = 2; i < hoff[0]; ++i) {
1114         const char *k = ptr + hoff[i];
1115         /* one past last line hoff[hoff[0]] is to final "\r\n" */
1116         char *end = ptr + hoff[i+1];
1117 
1118         const char *colon = memchr(k, ':', end - k);
1119         if (NULL == colon)
1120             return http_request_header_line_invalid(r, 400, "invalid header missing ':' -> 400");
1121 
1122         const char *v = colon + 1;
1123 
1124         /* RFC7230 Hypertext Transfer Protocol (HTTP/1.1): Message Syntax and Routing
1125          * 3.2.4.  Field Parsing
1126          * [...]
1127          * No whitespace is allowed between the header field-name and colon.  In
1128          * the past, differences in the handling of such whitespace have led to
1129          * security vulnerabilities in request routing and response handling.  A
1130          * server MUST reject any received request message that contains
1131          * whitespace between a header field-name and colon with a response code
1132          * of 400 (Bad Request).  A proxy MUST remove any such whitespace from a
1133          * response message before forwarding the message downstream.
1134          */
1135         /* (line k[-1] is always preceded by a '\n',
1136          *  including first header after request-line,
1137          *  so no need to check colon != k) */
1138         if (colon[-1] == ' ' || colon[-1] == '\t') {
1139             if (http_header_strict) {
1140                 return http_request_header_line_invalid(r, 400, "invalid whitespace between field-name and colon -> 400");
1141             }
1142             else {
1143                 /* remove trailing whitespace from key(if !http_header_strict)*/
1144                 do { --colon; } while (colon[-1] == ' ' || colon[-1] == '\t');
1145             }
1146         }
1147 
1148         const int klen = (int)(colon - k);
1149         if (0 == klen)
1150             return http_request_header_line_invalid(r, 400, "invalid header key -> 400");
1151         const enum http_header_e id = http_header_hkey_get(k, klen);
1152 
1153         if (id == HTTP_HEADER_OTHER) {
1154             for (int j = 0; j < klen; ++j) {
1155                 if (light_isalpha(k[j]) || k[j] == '-') continue; /*(common cases)*/
1156                 if (0 != http_request_parse_header_other(r, k+j, klen-j, http_header_strict))
1157                     return 400;
1158                 break;
1159             }
1160         }
1161 
1162         /* remove leading whitespace from value */
1163         while (*v == ' ' || *v == '\t') ++v;
1164 
1165         for (; i+1 <= hoff[0]; ++i) {
1166             end = ptr + hoff[i+1];
1167             if (end[0] != ' ' && end[0] != '\t') break;
1168 
1169             /* line folding */
1170           #ifdef __COVERITY__
1171             force_assert(end - k >= 2);
1172           #endif
1173             if (end[-2] == '\r')
1174                 end[-2] = ' ';
1175             else if (http_header_strict)
1176                 return http_request_header_line_invalid(r, 400, "missing CR before LF in header -> 400");
1177             end[-1] = ' ';
1178         }
1179       #ifdef __COVERITY__
1180         /*(buf holding k has non-zero request-line, so end[-2] valid)*/
1181         force_assert(end >= k + 2);
1182       #endif
1183         if (end[-2] == '\r')
1184             --end;
1185         else if (http_header_strict)
1186             return http_request_header_line_invalid(r, 400, "missing CR before LF in header -> 400");
1187         /* remove trailing whitespace from value (+ remove '\r\n') */
1188         /* (line k[-1] is always preceded by a '\n',
1189          *  including first header after request-line,
1190          *  so no need to check (end != k)) */
1191         do { --end; } while (end[-1] == ' ' || end[-1] == '\t');
1192 
1193         const int vlen = (int)(end - v);
1194         /* empty header-fields are not allowed by HTTP-RFC, we just ignore them */
1195         if (vlen <= 0) continue; /* ignore header */
1196 
1197         if (http_header_strict) {
1198             const char * const x = http_request_check_line_strict(v, vlen);
1199             if (x)
1200                 return http_request_header_char_invalid(r, *x,
1201                   "invalid character in header -> 400");
1202         } /* else URI already checked in http_request_parse_reqline() for any '\0' */
1203 
1204         int status = http_request_parse_single_header(r, id, k, (size_t)klen, v, (size_t)vlen);
1205         if (0 != status) return status;
1206     }
1207 
1208     return 0;
1209 }
1210 
1211 
1212 static int
http_request_parse(request_st * const restrict r,const int scheme_port)1213 http_request_parse (request_st * const restrict r, const int scheme_port)
1214 {
1215     int status = http_request_parse_target(r, scheme_port);
1216     if (0 != status) return status;
1217 
1218     /* post-processing */
1219     const unsigned int http_parseopts = r->conf.http_parseopts;
1220 
1221     /* check hostname field if it is set */
1222     /*(r->http_host might not be set until after parsing request headers)*/
1223     if (__builtin_expect( (r->http_host != NULL), 1)) {
1224         if (0 != http_request_host_policy(r->http_host,
1225                                           http_parseopts, scheme_port))
1226             return http_request_header_line_invalid(r, 400, "Invalid Hostname -> 400");
1227         buffer_copy_buffer(&r->uri.authority, r->http_host);
1228     }
1229     else {
1230         buffer_copy_string_len(&r->uri.authority, CONST_STR_LEN(""));
1231         if (r->http_version >= HTTP_VERSION_1_1)
1232             return http_request_header_line_invalid(r, 400, "HTTP/1.1 but Host missing -> 400");
1233     }
1234 
1235     if (HTTP_VERSION_1_1 != r->http_version
1236         && (r->rqst_htags
1237             & (light_bshift(HTTP_HEADER_UPGRADE)
1238               |light_bshift(HTTP_HEADER_HTTP2_SETTINGS)))) {
1239         return http_request_header_line_invalid(r, 400, "invalid hop-by-hop header w/o HTTP/1.1 -> 400");
1240     }
1241 
1242     if (0 == r->reqbody_length) {
1243         /* POST requires Content-Length (or Transfer-Encoding)
1244          * (-1 == r->reqbody_length when Transfer-Encoding: chunked)*/
1245         if (HTTP_METHOD_POST == r->http_method
1246             && !light_btst(r->rqst_htags, HTTP_HEADER_CONTENT_LENGTH)) {
1247             return http_request_header_line_invalid(r, 411, "POST-request, but content-length missing -> 411");
1248         }
1249     }
1250     else {
1251         /* (-1 == r->reqbody_length when Transfer-Encoding: chunked)*/
1252         if (-1 == r->reqbody_length
1253             && light_btst(r->rqst_htags, HTTP_HEADER_CONTENT_LENGTH)) {
1254             /* RFC7230 Hypertext Transfer Protocol (HTTP/1.1): Message Syntax and Routing
1255              * 3.3.3.  Message Body Length
1256              * [...]
1257              * If a message is received with both a Transfer-Encoding and a
1258              * Content-Length header field, the Transfer-Encoding overrides the
1259              * Content-Length.  Such a message might indicate an attempt to
1260              * perform request smuggling (Section 9.5) or response splitting
1261              * (Section 9.4) and ought to be handled as an error.  A sender MUST
1262              * remove the received Content-Length field prior to forwarding such
1263              * a message downstream.
1264              */
1265             const unsigned int http_header_strict =
1266               (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT);
1267             if (http_header_strict) {
1268                 return http_request_header_line_invalid(r, 400, "invalid Transfer-Encoding + Content-Length -> 400");
1269             }
1270             else {
1271                 /* ignore Content-Length */
1272                 http_header_request_unset(r, HTTP_HEADER_CONTENT_LENGTH, CONST_STR_LEN("Content-Length"));
1273             }
1274         }
1275         if (http_method_get_or_head(r->http_method)
1276             && !(http_parseopts & HTTP_PARSEOPT_METHOD_GET_BODY)) {
1277             return http_request_header_line_invalid(r, 400, "GET/HEAD with content-length -> 400");
1278         }
1279     }
1280 
1281     return 0;
1282 }
1283 
1284 
1285 static int
http_request_parse_hoff(request_st * const restrict r,char * const restrict hdrs,const unsigned short * const restrict hoff,const int scheme_port)1286 http_request_parse_hoff (request_st * const restrict r, char * const restrict hdrs, const unsigned short * const restrict hoff, const int scheme_port)
1287 {
1288     /*
1289      * Request: "^(GET|POST|HEAD|...) ([^ ]+(\\?[^ ]+|)) (HTTP/1\\.[01])$"
1290      * Header : "^([-a-zA-Z]+): (.+)$"
1291      * End    : "^$"
1292      */
1293 
1294     int status;
1295     const unsigned int http_parseopts = r->conf.http_parseopts;
1296 
1297     status = http_request_parse_reqline(r, hdrs, hoff, http_parseopts);
1298     if (0 != status) return status;
1299 
1300     status = http_request_parse_headers(r, hdrs, hoff, http_parseopts);
1301     if (0 != status) return status;
1302 
1303     return http_request_parse(r, scheme_port);
1304 }
1305 
1306 
1307 static void
http_request_headers_fin(request_st * const restrict r)1308 http_request_headers_fin (request_st * const restrict r)
1309 {
1310     if (0 == r->http_status) {
1311       #if 0
1312         r->conditional_is_valid = (1 << COMP_SERVER_SOCKET)
1313                                 | (1 << COMP_HTTP_SCHEME)
1314                                 | (1 << COMP_HTTP_HOST)
1315                                 | (1 << COMP_HTTP_REMOTE_IP)
1316                                 | (1 << COMP_HTTP_REQUEST_METHOD)
1317                                 | (1 << COMP_HTTP_URL)
1318                                 | (1 << COMP_HTTP_QUERY_STRING)
1319                                 | (1 << COMP_HTTP_REQUEST_HEADER);
1320       #else
1321         /* all config conditions are valid after parsing header
1322          * (set all bits; remove dependency on plugin_config.h) */
1323         r->conditional_is_valid = ~0u;
1324       #endif
1325     }
1326     else {
1327         r->keep_alive = 0;
1328         r->reqbody_length = 0;
1329     }
1330 }
1331 
1332 
1333 void
http_request_headers_process(request_st * const restrict r,char * const restrict hdrs,const unsigned short * const restrict hoff,const int scheme_port)1334 http_request_headers_process (request_st * const restrict r, char * const restrict hdrs, const unsigned short * const restrict hoff, const int scheme_port)
1335 {
1336     r->http_status = http_request_parse_hoff(r, hdrs, hoff, scheme_port);
1337 
1338     http_request_headers_fin(r);
1339 
1340     if (__builtin_expect( (0 != r->http_status), 0)) {
1341         if (r->conf.log_request_header_on_error) {
1342             /*(http_request_parse_headers() modifies hdrs only to
1343              * undo line-wrapping in-place using spaces)*/
1344             log_error_multiline(r->conf.errh, __FILE__, __LINE__,
1345                                 hdrs, r->rqst_header_len, "rqst: ");
1346         }
1347     }
1348 }
1349 
1350 
1351 void
http_request_headers_process_h2(request_st * const restrict r,const int scheme_port)1352 http_request_headers_process_h2 (request_st * const restrict r, const int scheme_port)
1353 {
1354     if (0 == r->http_status)
1355         r->http_status = http_request_parse(r, scheme_port);
1356 
1357     if (0 == r->http_status) {
1358         if (light_btst(r->rqst_htags, HTTP_HEADER_CONNECTION))
1359             r->http_status = http_request_header_line_invalid(r, 400,
1360               "invalid Connection header with HTTP/2 -> 400");
1361     }
1362 
1363     http_request_headers_fin(r);
1364 
1365     /* limited; headers not collected into a single buf for HTTP/2 */
1366     if (__builtin_expect( (0 != r->http_status), 0)) {
1367         if (r->conf.log_request_header_on_error) {
1368             log_error(r->conf.errh, __FILE__, __LINE__,
1369               "request-header:\n:authority: %s\n:method: %s\n:path: %s",
1370               r->http_host ? r->http_host->ptr : "",
1371               http_method_buf(r->http_method)->ptr,
1372               !buffer_is_blank(&r->target) ? r->target.ptr : "");
1373         }
1374     }
1375 
1376     /* ignore Upgrade if using HTTP/2 */
1377     if (light_btst(r->rqst_htags, HTTP_HEADER_UPGRADE))
1378         http_header_request_unset(r, HTTP_HEADER_UPGRADE,
1379                                   CONST_STR_LEN("upgrade"));
1380     /* XXX: should filter out other hop-by-hop connection headers, too */
1381 }
1382