xref: /lighttpd1.4/src/request.c (revision f2610d23)
1c18f442aSGlenn Strauss /*
2c18f442aSGlenn Strauss  * request - HTTP request processing
3c18f442aSGlenn Strauss  *
44c12d7daSGlenn Strauss  * Fully-rewritten from original
5c18f442aSGlenn Strauss  * Copyright(c) 2018 Glenn Strauss gstrauss()gluelogic.com  All rights reserved
6c18f442aSGlenn Strauss  * License: BSD 3-clause (same as lighttpd)
7c18f442aSGlenn Strauss  */
88abd06a7SGlenn Strauss #include "first.h"
98abd06a7SGlenn Strauss 
1022e8b456SStefan Bühler #include "request.h"
113eb7902eSGlenn Strauss #include "burl.h"
123dd3cde9SGlenn Strauss #include "http_header.h"
13c56b2108SGlenn Strauss #include "http_kv.h"
1422e8b456SStefan Bühler #include "log.h"
151367f606SGlenn Strauss #include "sock_addr.h"
1622e8b456SStefan Bühler 
17bcdc6a3bSJan Kneschke #include <limits.h>
185547530aSGlenn Strauss #include <stdint.h>
19bcdc6a3bSJan Kneschke #include <stdlib.h>
20bcdc6a3bSJan Kneschke #include <string.h>
21bcdc6a3bSJan Kneschke 
225e0d82dcSGlenn Strauss 
235e0d82dcSGlenn Strauss __attribute_cold__
245e0d82dcSGlenn Strauss __attribute_noinline__
255e0d82dcSGlenn Strauss void
http_request_state_append(buffer * const b,request_state_t state)265e0d82dcSGlenn Strauss http_request_state_append (buffer * const b, request_state_t state)
275e0d82dcSGlenn Strauss {
285e0d82dcSGlenn Strauss     static const struct sn { const char *s; uint32_t n; } states[] = {
295e0d82dcSGlenn Strauss       { CONST_STR_LEN("connect") }
305e0d82dcSGlenn Strauss      ,{ CONST_STR_LEN("req-start") }
315e0d82dcSGlenn Strauss      ,{ CONST_STR_LEN("read") }
325e0d82dcSGlenn Strauss      ,{ CONST_STR_LEN("req-end") }
335e0d82dcSGlenn Strauss      ,{ CONST_STR_LEN("readpost") }
345e0d82dcSGlenn Strauss      ,{ CONST_STR_LEN("handle-req") }
355e0d82dcSGlenn Strauss      ,{ CONST_STR_LEN("resp-start") }
365e0d82dcSGlenn Strauss      ,{ CONST_STR_LEN("write") }
375e0d82dcSGlenn Strauss      ,{ CONST_STR_LEN("resp-end") }
385e0d82dcSGlenn Strauss      ,{ CONST_STR_LEN("error") }
395e0d82dcSGlenn Strauss      ,{ CONST_STR_LEN("close") }
405e0d82dcSGlenn Strauss      ,{ CONST_STR_LEN("(unknown)") }
415e0d82dcSGlenn Strauss     };
425e0d82dcSGlenn Strauss     const struct sn * const p =
435e0d82dcSGlenn Strauss       states +((uint32_t)state <= CON_STATE_CLOSE ? state : CON_STATE_CLOSE+1);
445e0d82dcSGlenn Strauss     buffer_append_string_len(b, p->s, p->n);
455e0d82dcSGlenn Strauss }
465e0d82dcSGlenn Strauss 
475e0d82dcSGlenn Strauss __attribute_cold__
485e0d82dcSGlenn Strauss __attribute_noinline__
495e0d82dcSGlenn Strauss __attribute_pure__
505e0d82dcSGlenn Strauss const char *
http_request_state_short(request_state_t state)515e0d82dcSGlenn Strauss http_request_state_short (request_state_t state)
525e0d82dcSGlenn Strauss {
535e0d82dcSGlenn Strauss     /*((char *) returned, but caller must use only one char)*/
545e0d82dcSGlenn Strauss     static const char sstates[] = ".qrQRhsWSECx";
555e0d82dcSGlenn Strauss     return
565e0d82dcSGlenn Strauss       sstates+((uint32_t)state <= CON_STATE_CLOSE ? state : CON_STATE_CLOSE+1);
575e0d82dcSGlenn Strauss }
585e0d82dcSGlenn Strauss 
595e0d82dcSGlenn Strauss 
60438daddeSGlenn Strauss __attribute_noinline__
__attribute_nonnull__()61438daddeSGlenn Strauss __attribute_nonnull__()
62438daddeSGlenn Strauss __attribute_pure__
63438daddeSGlenn Strauss static const char * http_request_check_uri_strict (const uint8_t * const restrict s, const uint_fast32_t len) {
64438daddeSGlenn Strauss     for (uint_fast32_t i = 0; i < len; ++i) {
65438daddeSGlenn Strauss         if (__builtin_expect( (s[i] <= 32),  0)) return (const char *)s+i;
66438daddeSGlenn Strauss         if (__builtin_expect( (s[i] == 127), 0)) return (const char *)s+i;
67438daddeSGlenn Strauss         if (__builtin_expect( (s[i] == 255), 0)) return (const char *)s+i;
68438daddeSGlenn Strauss     }
69438daddeSGlenn Strauss     return NULL;
70438daddeSGlenn Strauss }
71438daddeSGlenn Strauss 
__attribute_nonnull__()72438daddeSGlenn Strauss __attribute_nonnull__()
73438daddeSGlenn Strauss __attribute_pure__
74438daddeSGlenn Strauss static const char * http_request_check_line_strict (const char * const restrict s, const uint_fast32_t len) {
75438daddeSGlenn Strauss     for (uint_fast32_t i = 0; i < len; ++i) {
76438daddeSGlenn Strauss         if (__builtin_expect( (((const uint8_t *)s)[i]<32), 0) && s[i] != '\t')
77438daddeSGlenn Strauss             return s+i;
78438daddeSGlenn Strauss         if (__builtin_expect( (s[i] == 127), 0))
79438daddeSGlenn Strauss             return s+i;
80438daddeSGlenn Strauss     }
81438daddeSGlenn Strauss     return NULL;
82438daddeSGlenn Strauss }
83438daddeSGlenn Strauss 
__attribute_nonnull__()84438daddeSGlenn Strauss __attribute_nonnull__()
85438daddeSGlenn Strauss __attribute_pure__
86438daddeSGlenn Strauss static const char * http_request_check_line_minimal (const char * const restrict s, const uint_fast32_t len) {
87438daddeSGlenn Strauss     for (uint_fast32_t i = 0; i < len; ++i) {
88438daddeSGlenn Strauss         if (__builtin_expect( (s[i] == '\0'), 0)) return s+i;
89772919f9SGlenn Strauss         if (__builtin_expect( (s[i] == '\r'), 0)) return s+i;
9092f2ac9bSGlenn Strauss         if (__builtin_expect( (s[i] == '\n'), 0)) return s+i;
91438daddeSGlenn Strauss     }
92438daddeSGlenn Strauss     return NULL;
93438daddeSGlenn Strauss }
94438daddeSGlenn Strauss 
request_check_hostname(buffer * const host)9571a7b549SGlenn Strauss static int request_check_hostname(buffer * const host) {
96bcdc6a3bSJan Kneschke     /*
97bcdc6a3bSJan Kneschke      *       hostport      = host [ ":" port ]
98bcdc6a3bSJan Kneschke      *       host          = hostname | IPv4address | IPv6address
99bcdc6a3bSJan Kneschke      *       hostname      = *( domainlabel "." ) toplabel [ "." ]
100bcdc6a3bSJan Kneschke      *       domainlabel   = alphanum | alphanum *( alphanum | "-" ) alphanum
101bcdc6a3bSJan Kneschke      *       toplabel      = alpha | alpha *( alphanum | "-" ) alphanum
102bcdc6a3bSJan Kneschke      *       IPv4address   = 1*digit "." 1*digit "." 1*digit "." 1*digit
103bcdc6a3bSJan Kneschke      *       IPv6address   = "[" ... "]"
104bcdc6a3bSJan Kneschke      *       port          = *digit
105bcdc6a3bSJan Kneschke      */
106bcdc6a3bSJan Kneschke 
1074c12d7daSGlenn Strauss     const char *h = host->ptr;
108bcdc6a3bSJan Kneschke 
1094c12d7daSGlenn Strauss     if (*h != '[') {
110af3df29aSGlenn Strauss         uint32_t len = buffer_clen(host);
1114c12d7daSGlenn Strauss         const char * const colon = memchr(h, ':', len);
1129a2404ceSGlenn Strauss         uint32_t hlen = colon ? (uint32_t)(colon - h) : len;
113bcdc6a3bSJan Kneschke 
1144c12d7daSGlenn Strauss         /* if hostname ends in ".", strip it */
1154c12d7daSGlenn Strauss         if (__builtin_expect( (0 == hlen), 0)) return -1;
1164c12d7daSGlenn Strauss         if (__builtin_expect( (h[hlen-1] == '.'), 0)) {
117ef59a627SStefan Bühler             /* shift port info one left */
1184c12d7daSGlenn Strauss             if (--hlen == 0) return -1;
1194c12d7daSGlenn Strauss             --len;
1204c12d7daSGlenn Strauss             if (NULL != colon)
1214c12d7daSGlenn Strauss                 memmove(host->ptr+hlen, colon, len - hlen);
122af3df29aSGlenn Strauss             buffer_truncate(host, len);
123ef59a627SStefan Bühler         }
124ef59a627SStefan Bühler 
1254c12d7daSGlenn Strauss         int label_len = 0;
1264c12d7daSGlenn Strauss         int allnumeric = 1;
1274c12d7daSGlenn Strauss         int numeric = 1;
1284c12d7daSGlenn Strauss         int level = 0;
1294c12d7daSGlenn Strauss         for (uint32_t i = 0; i < hlen; ++i) {
1304c12d7daSGlenn Strauss             const int ch = h[i];
1314c12d7daSGlenn Strauss             ++label_len;
1324c12d7daSGlenn Strauss             if (light_isdigit(ch))
1334c12d7daSGlenn Strauss                 continue;
1344c12d7daSGlenn Strauss             else if ((light_isalpha(ch) || (ch == '-' && i != 0)))
1354c12d7daSGlenn Strauss                 numeric = 0;
1364c12d7daSGlenn Strauss             else if (ch == '.' && 1 != label_len && '-' != h[i+1]) {
1374c12d7daSGlenn Strauss                 allnumeric &= numeric;
1384c12d7daSGlenn Strauss                 numeric = 1;
139bcdc6a3bSJan Kneschke                 label_len = 0;
1404c12d7daSGlenn Strauss                 ++level;
1414c12d7daSGlenn Strauss             }
1424c12d7daSGlenn Strauss             else
143bcdc6a3bSJan Kneschke                 return -1;
144bcdc6a3bSJan Kneschke         }
1454c12d7daSGlenn Strauss         /* (if last segment numeric, then IPv4 and must have 4 numeric parts) */
1464c12d7daSGlenn Strauss         if (0 == label_len || (numeric && (level != 3 || !allnumeric)))
147bcdc6a3bSJan Kneschke             return -1;
1484c12d7daSGlenn Strauss 
1494c12d7daSGlenn Strauss         h += hlen;
150bcdc6a3bSJan Kneschke     }
1514c12d7daSGlenn Strauss     else {  /* IPv6 address */
1524c12d7daSGlenn Strauss         /* check the address inside [...]; note: not fully validating */
1534c12d7daSGlenn Strauss         /* (note: not allowing scoped literals, e.g. %eth0 suffix) */
1544c12d7daSGlenn Strauss         ++h; /* step past '[' */
1554c12d7daSGlenn Strauss         int cnt = 0;
1564c12d7daSGlenn Strauss         while (light_isxdigit(*h) || *h == '.' || (*h == ':' && ++cnt < 8)) ++h;
1574c12d7daSGlenn Strauss         /*(invalid char, too many ':', missing ']', or empty "[]")*/
1584c12d7daSGlenn Strauss         if (*h != ']' || h - host->ptr == 1) return -1;
1594c12d7daSGlenn Strauss         ++h; /* step past ']' */
160bcdc6a3bSJan Kneschke     }
161bcdc6a3bSJan Kneschke 
1624c12d7daSGlenn Strauss     /* check numerical port, if present */
1634c12d7daSGlenn Strauss     if (*h == ':') {
1644c12d7daSGlenn Strauss         if (__builtin_expect( (h[1] == '\0'), 0)) /*(remove trailing colon)*/
165af3df29aSGlenn Strauss             buffer_truncate(host, h - host->ptr);
1664c12d7daSGlenn Strauss         do { ++h; } while (light_isdigit(*h));
167bcdc6a3bSJan Kneschke     }
168bcdc6a3bSJan Kneschke 
1694c12d7daSGlenn Strauss     return (*h == '\0') ? 0 : -1;
170bcdc6a3bSJan Kneschke }
171bcdc6a3bSJan Kneschke 
http_request_host_normalize(buffer * const b,const int scheme_port)17271a7b549SGlenn Strauss int http_request_host_normalize(buffer * const b, const int scheme_port) {
173b47494d4SGlenn Strauss     /*
174b47494d4SGlenn Strauss      * check for and canonicalize numeric IP address and portnum (optional)
175b47494d4SGlenn Strauss      * (IP address may be followed by ":portnum" (optional))
176b47494d4SGlenn Strauss      * - IPv6: "[...]"
177b47494d4SGlenn Strauss      * - IPv4: "x.x.x.x"
178b47494d4SGlenn Strauss      * - IPv4: 12345678   (32-bit decimal number)
179b47494d4SGlenn Strauss      * - IPv4: 012345678  (32-bit octal number)
180b47494d4SGlenn Strauss      * - IPv4: 0x12345678 (32-bit hex number)
181b47494d4SGlenn Strauss      *
182b47494d4SGlenn Strauss      * allow any chars (except ':' and '\0' and stray '[' or ']')
183b47494d4SGlenn Strauss      *   (other code may check chars more strictly or more pedantically)
184b47494d4SGlenn Strauss      * ':'  delimits (optional) port at end of string
185b47494d4SGlenn Strauss      * "[]" wraps IPv6 address literal
186b47494d4SGlenn Strauss      * '\0' should have been rejected earlier were it present
187b47494d4SGlenn Strauss      *
188b47494d4SGlenn Strauss      * any chars includes, but is not limited to:
189b47494d4SGlenn Strauss      * - allow '-' any where, even at beginning of word
190b47494d4SGlenn Strauss      *     (security caution: might be confused for cmd flag if passed to shell)
191b47494d4SGlenn Strauss      * - allow all-digit TLDs
192b47494d4SGlenn Strauss      *     (might be mistaken for IPv4 addr by inet_aton()
193b47494d4SGlenn Strauss      *      unless non-digits appear in subdomain)
194b47494d4SGlenn Strauss      */
195b47494d4SGlenn Strauss 
196b47494d4SGlenn Strauss     /* Note: not using getaddrinfo() since it does not support "[]" around IPv6
197b47494d4SGlenn Strauss      * and is not as lenient as inet_aton() and inet_addr() for IPv4 strings.
198b47494d4SGlenn Strauss      * Not using inet_pton() (when available) on IPv4 for similar reasons. */
199b47494d4SGlenn Strauss 
200b47494d4SGlenn Strauss     const char * const p = b->ptr;
201af3df29aSGlenn Strauss     const size_t blen = buffer_clen(b);
202b47494d4SGlenn Strauss     long port = 0;
203b47494d4SGlenn Strauss 
204b47494d4SGlenn Strauss     if (*p != '[') {
205b47494d4SGlenn Strauss         char * const colon = (char *)memchr(p, ':', blen);
206b47494d4SGlenn Strauss         if (colon) {
207b47494d4SGlenn Strauss             if (*p == ':') return -1; /*(empty host then port, or naked IPv6)*/
208b47494d4SGlenn Strauss             if (colon[1] != '\0') {
209b47494d4SGlenn Strauss                 char *e;
210b47494d4SGlenn Strauss                 port = strtol(colon+1, &e, 0); /*(allow decimal, octal, hex)*/
211b47494d4SGlenn Strauss                 if (0 < port && port <= USHRT_MAX && *e == '\0') {
212b47494d4SGlenn Strauss                     /* valid port */
213b47494d4SGlenn Strauss                 } else {
214b47494d4SGlenn Strauss                     return -1;
215b47494d4SGlenn Strauss                 }
216b47494d4SGlenn Strauss             } /*(else ignore stray colon at string end)*/
217af3df29aSGlenn Strauss             buffer_truncate(b, (size_t)(colon - p)); /*(remove port str)*/
218b47494d4SGlenn Strauss         }
219b47494d4SGlenn Strauss 
22015277bf9SGlenn Strauss         if (light_isdigit(*p)) do {
221b47494d4SGlenn Strauss             /* (IPv4 address literal or domain starting w/ digit (e.g. 3com))*/
22215277bf9SGlenn Strauss             /* (check one-element cache of normalized IPv4 address string) */
22315277bf9SGlenn Strauss             static struct { char s[INET_ADDRSTRLEN]; size_t n; } laddr;
22415277bf9SGlenn Strauss             size_t n = colon ? (size_t)(colon - p) : blen;
225a4488864SGlenn Strauss             sock_addr addr;
22615277bf9SGlenn Strauss             if (n == laddr.n && 0 == memcmp(p, laddr.s, n)) break;
227a4488864SGlenn Strauss             if (1 == sock_addr_inet_pton(&addr, p, AF_INET, 0)) {
228a4488864SGlenn Strauss                 sock_addr_inet_ntop_copy_buffer(b, &addr);
229af3df29aSGlenn Strauss                 n = buffer_clen(b);
23015277bf9SGlenn Strauss                 if (n < sizeof(laddr.s)) memcpy(laddr.s, b->ptr, (laddr.n = n));
231b47494d4SGlenn Strauss             }
23215277bf9SGlenn Strauss         } while (0);
23315277bf9SGlenn Strauss     } else do { /* IPv6 addr */
234b47494d4SGlenn Strauss       #if defined(HAVE_IPV6) && defined(HAVE_INET_PTON)
235b47494d4SGlenn Strauss 
23615277bf9SGlenn Strauss         /* (check one-element cache of normalized IPv4 address string) */
23715277bf9SGlenn Strauss         static struct { char s[INET6_ADDRSTRLEN]; size_t n; } laddr;
238a4488864SGlenn Strauss         sock_addr addr;
239b47494d4SGlenn Strauss         char *bracket = b->ptr+blen-1;
24038139fa1SGlenn Strauss         char *percent = strchr(b->ptr+1, '%');
24138139fa1SGlenn Strauss         size_t len;
242b47494d4SGlenn Strauss         int rc;
24338139fa1SGlenn Strauss         char buf[INET6_ADDRSTRLEN+16]; /*(+16 for potential %interface name)*/
244879a282dSGlenn Strauss         if (blen <= 2) return -1; /*(invalid "[]")*/
245b47494d4SGlenn Strauss         if (*bracket != ']') {
246b47494d4SGlenn Strauss             bracket = (char *)memchr(b->ptr+1, ']', blen-1);
247b47494d4SGlenn Strauss             if (NULL == bracket || bracket[1] != ':'  || bracket - b->ptr == 1){
248b47494d4SGlenn Strauss                return -1;
249b47494d4SGlenn Strauss             }
250b47494d4SGlenn Strauss             if (bracket[2] != '\0') { /*(ignore stray colon at string end)*/
251b47494d4SGlenn Strauss                 char *e;
252b47494d4SGlenn Strauss                 port = strtol(bracket+2, &e, 0); /*(allow decimal, octal, hex)*/
253b47494d4SGlenn Strauss                 if (0 < port && port <= USHRT_MAX && *e == '\0') {
254b47494d4SGlenn Strauss                     /* valid port */
255b47494d4SGlenn Strauss                 } else {
256b47494d4SGlenn Strauss                     return -1;
257b47494d4SGlenn Strauss                 }
258b47494d4SGlenn Strauss             }
259b47494d4SGlenn Strauss         }
260b47494d4SGlenn Strauss 
26115277bf9SGlenn Strauss         len = (size_t)((percent ? percent : bracket) - (b->ptr+1));
26215277bf9SGlenn Strauss         if (laddr.n == len && 0 == memcmp(laddr.s, b->ptr+1, len)) {
26315277bf9SGlenn Strauss             /* truncate after ']' and re-add normalized port, if needed */
264af3df29aSGlenn Strauss             buffer_truncate(b, (size_t)(bracket - b->ptr + 1));
26515277bf9SGlenn Strauss             break;
26615277bf9SGlenn Strauss         }
26715277bf9SGlenn Strauss 
268b47494d4SGlenn Strauss         *bracket = '\0';/*(terminate IPv6 string)*/
26938139fa1SGlenn Strauss         if (percent) *percent = '\0'; /*(remove %interface from address)*/
270a4488864SGlenn Strauss         rc = sock_addr_inet_pton(&addr, b->ptr+1, AF_INET6, 0);
27138139fa1SGlenn Strauss         if (percent) *percent = '%'; /*(restore %interface)*/
272b47494d4SGlenn Strauss         *bracket = ']'; /*(restore bracket)*/
273b47494d4SGlenn Strauss         if (1 != rc) return -1;
274b47494d4SGlenn Strauss 
275a4488864SGlenn Strauss         sock_addr_inet_ntop(&addr, buf, sizeof(buf));
27638139fa1SGlenn Strauss         len = strlen(buf);
27738139fa1SGlenn Strauss         if (percent) {
27838139fa1SGlenn Strauss             if (percent > bracket) return -1;
27938139fa1SGlenn Strauss             if (len + (size_t)(bracket - percent) >= sizeof(buf)) return -1;
28015277bf9SGlenn Strauss             if (len < sizeof(laddr.s)) memcpy(laddr.s, buf, (laddr.n = len));
28138139fa1SGlenn Strauss             memcpy(buf+len, percent, (size_t)(bracket - percent));
28238139fa1SGlenn Strauss             len += (size_t)(bracket - percent);
28338139fa1SGlenn Strauss         }
284af3df29aSGlenn Strauss         buffer_truncate(b, 1); /* truncate after '[' */
285dc01487eSGlenn Strauss         buffer_append_str2(b, buf, len, CONST_STR_LEN("]"));
286b47494d4SGlenn Strauss 
287b47494d4SGlenn Strauss       #else
288b47494d4SGlenn Strauss 
289b47494d4SGlenn Strauss         return -1;
290b47494d4SGlenn Strauss 
291b47494d4SGlenn Strauss       #endif
29215277bf9SGlenn Strauss     } while (0);
293b47494d4SGlenn Strauss 
29440f72a41SGlenn Strauss     if (0 != port && port != scheme_port) {
295*f2610d23SGlenn Strauss         buffer_append_char(b, ':');
296b47494d4SGlenn Strauss         buffer_append_int(b, (int)port);
297b47494d4SGlenn Strauss     }
298b47494d4SGlenn Strauss 
299b47494d4SGlenn Strauss     return 0;
300b47494d4SGlenn Strauss }
301b47494d4SGlenn Strauss 
http_request_host_policy(buffer * const b,const unsigned int http_parseopts,const int scheme_port)3021dd58c5aSGlenn Strauss int http_request_host_policy (buffer * const b, const unsigned int http_parseopts, const int scheme_port) {
3038c5acaaeSGlenn Strauss     /* caller should lowercase, as is done in http_request_header_set_Host(),
3048c5acaaeSGlenn Strauss      * for consistency in case the value is used prior to calling policy func */
3058c5acaaeSGlenn Strauss     /*buffer_to_lower(b);*/
306034d7d67SGlenn Strauss     return (((http_parseopts & HTTP_PARSEOPT_HOST_STRICT)
307438daddeSGlenn Strauss                ? 0 != request_check_hostname(b)
308438daddeSGlenn Strauss                : NULL != http_request_check_line_minimal(BUF_PTR_LEN(b)))
309034d7d67SGlenn Strauss             || ((http_parseopts & HTTP_PARSEOPT_HOST_NORMALIZE)
3101dd58c5aSGlenn Strauss                 && 0 != http_request_host_normalize(b, scheme_port)));
3111104afc4SGlenn Strauss }
3121104afc4SGlenn Strauss 
313e5d61e9aSGlenn Strauss __attribute_cold__
314e5d61e9aSGlenn Strauss __attribute_noinline__
http_request_header_line_invalid(request_st * const restrict r,const int status,const char * const restrict msg)3159914bb29SGlenn Strauss static int http_request_header_line_invalid(request_st * const restrict r, const int status, const char * const restrict msg) {
3167c7f8c46SGlenn Strauss     if (r->conf.log_request_header_on_error) {
3177c7f8c46SGlenn Strauss         if (msg) log_error(r->conf.errh, __FILE__, __LINE__, "%s", msg);
318fa1eef00SGlenn Strauss     }
319e5d61e9aSGlenn Strauss     return status;
320e5d61e9aSGlenn Strauss }
321e5d61e9aSGlenn Strauss 
322e5d61e9aSGlenn Strauss __attribute_cold__
323e5d61e9aSGlenn Strauss __attribute_noinline__
http_request_header_char_invalid(request_st * const restrict r,const char ch,const char * const restrict msg)3249914bb29SGlenn Strauss static int http_request_header_char_invalid(request_st * const restrict r, const char ch, const char * const restrict msg) {
3257c7f8c46SGlenn Strauss     if (r->conf.log_request_header_on_error) {
326e5d61e9aSGlenn Strauss         if ((unsigned char)ch > 32 && ch != 127) {
3277c7f8c46SGlenn Strauss             log_error(r->conf.errh, __FILE__, __LINE__, "%s ('%c')", msg, ch);
328e5d61e9aSGlenn Strauss         }
329e5d61e9aSGlenn Strauss         else {
3307c7f8c46SGlenn Strauss             log_error(r->conf.errh, __FILE__, __LINE__, "%s (0x%x)", msg, ch);
331e5d61e9aSGlenn Strauss         }
332e5d61e9aSGlenn Strauss     }
333e5d61e9aSGlenn Strauss     return 400;
334fa1eef00SGlenn Strauss }
335fa1eef00SGlenn Strauss 
3365547530aSGlenn Strauss 
3378c5acaaeSGlenn Strauss __attribute_noinline__
http_request_header_set_Host(request_st * const restrict r,const char * const h,size_t hlen)3388c5acaaeSGlenn Strauss static void http_request_header_set_Host(request_st * const restrict r, const char * const h, size_t hlen)
3398c5acaaeSGlenn Strauss {
3408c5acaaeSGlenn Strauss     r->http_host = http_header_request_set_ptr(r, HTTP_HEADER_HOST,
3418c5acaaeSGlenn Strauss                                                CONST_STR_LEN("Host"));
342f490078dSGlenn Strauss     buffer_copy_string_len_lc(r->http_host, h, hlen);
3438c5acaaeSGlenn Strauss }
3448c5acaaeSGlenn Strauss 
3458c5acaaeSGlenn Strauss 
346e9309ae6SGlenn Strauss int64_t
li_restricted_strtoint64(const char * v,const uint32_t vlen,const char ** const err)3475547530aSGlenn Strauss li_restricted_strtoint64 (const char *v, const uint32_t vlen, const char ** const err)
3485547530aSGlenn Strauss {
3495547530aSGlenn Strauss     /* base 10 strtoll() parsing exactly vlen chars and requiring digits 0-9 */
3505547530aSGlenn Strauss     /* rejects negative numbers and considers values > INT64_MAX an error */
3515547530aSGlenn Strauss     /* note: errno is not set; detect error if *err != v+vlen upon return */
3525547530aSGlenn Strauss     /*(caller must check 0 == vlen if that is to be an error for caller)*/
3535547530aSGlenn Strauss     int64_t rv = 0;
3545547530aSGlenn Strauss     uint32_t i;
3555547530aSGlenn Strauss     for (i = 0; i < vlen; ++i) {
3565547530aSGlenn Strauss         const uint8_t c = ((uint8_t *)v)[i] - '0'; /*(unsigned; underflow ok)*/
3575547530aSGlenn Strauss         if (c > 9) break;
3585547530aSGlenn Strauss         if (rv > INT64_MAX/10) break;
3595547530aSGlenn Strauss         rv *= 10;
3605547530aSGlenn Strauss         if (rv > INT64_MAX - c) break;
3615547530aSGlenn Strauss         rv += c;
3625547530aSGlenn Strauss     }
3635547530aSGlenn Strauss     *err = v+i;
3645547530aSGlenn Strauss     return rv;
3655547530aSGlenn Strauss }
3665547530aSGlenn Strauss 
3675547530aSGlenn Strauss 
368a3b76ed5SGlenn Strauss __attribute_cold__
http_request_parse_duplicate(request_st * const restrict r,const enum http_header_e id,const char * const restrict k,const size_t klen,const char * const restrict v,const size_t vlen)369a3b76ed5SGlenn Strauss static int http_request_parse_duplicate(request_st * const restrict r, const enum http_header_e id, const char * const restrict k, const size_t klen, const char * const restrict v, const size_t vlen) {
370a3b76ed5SGlenn Strauss     /* Proxies sometimes send dup headers
371a3b76ed5SGlenn Strauss      * if they are the same we ignore the second
372a3b76ed5SGlenn Strauss      * if not, we raise an error */
373a3b76ed5SGlenn Strauss     const buffer * const vb = http_header_request_get(r, id, k, klen);
374a3b76ed5SGlenn Strauss     if (vb && buffer_eq_icase_slen(vb, v, vlen))
375a3b76ed5SGlenn Strauss         return 0; /* ignore header; matches existing header */
376a3b76ed5SGlenn Strauss 
377a3b76ed5SGlenn Strauss     const char *errmsg;
378a3b76ed5SGlenn Strauss     switch (id) {
379a3b76ed5SGlenn Strauss       case HTTP_HEADER_HOST:
380a3b76ed5SGlenn Strauss         errmsg = "duplicate Host header -> 400";
381a3b76ed5SGlenn Strauss         break;
382a3b76ed5SGlenn Strauss       case HTTP_HEADER_CONTENT_TYPE:
383a3b76ed5SGlenn Strauss         errmsg = "duplicate Content-Type header -> 400";
384a3b76ed5SGlenn Strauss         break;
385a3b76ed5SGlenn Strauss       case HTTP_HEADER_IF_MODIFIED_SINCE:
386a3b76ed5SGlenn Strauss         errmsg = "duplicate If-Modified-Since header -> 400";
387a3b76ed5SGlenn Strauss         break;
388e78cd765SGlenn Strauss       case HTTP_HEADER_HTTP2_SETTINGS:
389e78cd765SGlenn Strauss         errmsg = "duplicate HTTP2-Settings header -> 400";
390e78cd765SGlenn Strauss         break;
391a3b76ed5SGlenn Strauss       default:
392a3b76ed5SGlenn Strauss         errmsg = "duplicate header -> 400";
393a3b76ed5SGlenn Strauss         break;
394e78cd765SGlenn Strauss       case HTTP_HEADER_IF_NONE_MATCH:
395e78cd765SGlenn Strauss         /* if dup, only the first one will survive */
396e78cd765SGlenn Strauss         return 0; /* ignore header */
397a3b76ed5SGlenn Strauss     }
398a3b76ed5SGlenn Strauss     return http_request_header_line_invalid(r, 400, errmsg);
399a3b76ed5SGlenn Strauss }
400a3b76ed5SGlenn Strauss 
401a3b76ed5SGlenn Strauss 
4023dd3cde9SGlenn Strauss /* add header to list of headers
4033dd3cde9SGlenn Strauss  * certain headers are also parsed
4043dd3cde9SGlenn Strauss  * might drop a header if deemed unnecessary/broken
405ed0054c2SStefan Bühler  *
406e5d61e9aSGlenn Strauss  * returns 0 on success, HTTP status on error
407ed0054c2SStefan Bühler  */
http_request_parse_single_header(request_st * const restrict r,const enum http_header_e id,const char * const restrict k,const size_t klen,const char * const restrict v,const size_t vlen)4089914bb29SGlenn Strauss static int http_request_parse_single_header(request_st * const restrict r, const enum http_header_e id, const char * const restrict k, const size_t klen, const char * const restrict v, const size_t vlen) {
4093dd3cde9SGlenn Strauss     /*
4103dd3cde9SGlenn Strauss      * Note: k might not be '\0'-terminated
4115547530aSGlenn Strauss      * Note: v is not '\0'-terminated
4125547530aSGlenn Strauss      *   With lighttpd HTTP/1.1 parser, v ends with whitespace
41363e32e81SGlenn Strauss      *     (one of '\r' '\n' ' ' '\t')
4145547530aSGlenn Strauss      *   With lighttpd HTTP/2 parser, v should not be accessed beyond vlen
4155547530aSGlenn Strauss      *     (care must be taken to avoid libc funcs which expect z-strings)
416ed0054c2SStefan Bühler      */
4175547530aSGlenn Strauss     /*assert(vlen);*//*(caller must not call this func with 0 klen or 0 vlen)*/
418ed0054c2SStefan Bühler 
4193dd3cde9SGlenn Strauss     switch (id) {
4203dd3cde9SGlenn Strauss       /*case HTTP_HEADER_OTHER:*/
4213dd3cde9SGlenn Strauss       default:
422ed0054c2SStefan Bühler         break;
4233dd3cde9SGlenn Strauss       case HTTP_HEADER_HOST:
4249c8981a7SGlenn Strauss         if (!light_btst(r->rqst_htags, HTTP_HEADER_HOST)) {
425cef6ee67SGlenn Strauss             if (vlen >= 1024) { /*(expecting < 256)*/
4266870b0f5SGlenn Strauss                 return http_request_header_line_invalid(r, 400, "uri-authority too long -> 400");
427cef6ee67SGlenn Strauss             }
4288c5acaaeSGlenn Strauss             /*(http_request_header_append() plus sets r->http_host)*/
4298c5acaaeSGlenn Strauss             http_request_header_set_Host(r, v, vlen);
4308c5acaaeSGlenn Strauss             return 0;
4313dd3cde9SGlenn Strauss         }
4326870b0f5SGlenn Strauss         else if (NULL != r->http_host
433a3b76ed5SGlenn Strauss                  && __builtin_expect( buffer_eq_slen(r->http_host,v,vlen), 1)) {
434fa4ab192SGlenn Strauss             /* ignore all Host: headers if match authority in request line */
4358c5acaaeSGlenn Strauss             /* (expect Host to match case in :authority of HTTP/2 request) */
436e5d61e9aSGlenn Strauss             return 0; /* ignore header */
4373dd3cde9SGlenn Strauss         }
438e78cd765SGlenn Strauss         /* else parse duplicate for match or error */
439e78cd765SGlenn Strauss         __attribute_fallthrough__
440e78cd765SGlenn Strauss       case HTTP_HEADER_IF_MODIFIED_SINCE:
441e78cd765SGlenn Strauss       case HTTP_HEADER_IF_NONE_MATCH:
442e78cd765SGlenn Strauss       case HTTP_HEADER_CONTENT_TYPE:
443e78cd765SGlenn Strauss       case HTTP_HEADER_HTTP2_SETTINGS:
444e78cd765SGlenn Strauss         if (light_btst(r->rqst_htags, id))
445a3b76ed5SGlenn Strauss             return http_request_parse_duplicate(r, id, k, klen, v, vlen);
4463dd3cde9SGlenn Strauss         break;
4473dd3cde9SGlenn Strauss       case HTTP_HEADER_CONNECTION:
448e2b4c309SGlenn Strauss         /* "Connection: close" is common case if header is present */
449e2b4c309SGlenn Strauss         if ((vlen == 5 && buffer_eq_icase_ssn(v, CONST_STR_LEN("close")))
450e2b4c309SGlenn Strauss             || http_header_str_contains_token(v,vlen,CONST_STR_LEN("close"))) {
4516870b0f5SGlenn Strauss             r->keep_alive = 0;
4523dd3cde9SGlenn Strauss             break;
4533dd3cde9SGlenn Strauss         }
454e2b4c309SGlenn Strauss         if (http_header_str_contains_token(v,vlen,CONST_STR_LEN("keep-alive"))){
4556870b0f5SGlenn Strauss             r->keep_alive = 1;
456e2b4c309SGlenn Strauss             break;
4573dd3cde9SGlenn Strauss         }
4583dd3cde9SGlenn Strauss         break;
4593dd3cde9SGlenn Strauss       case HTTP_HEADER_CONTENT_LENGTH:
4609c8981a7SGlenn Strauss         if (!light_btst(r->rqst_htags, HTTP_HEADER_CONTENT_LENGTH)) {
46163e32e81SGlenn Strauss             /*(trailing whitespace was removed from vlen)*/
4625547530aSGlenn Strauss             /*(not using strtoll() since v might not be z-string)*/
4635547530aSGlenn Strauss             const char *err;
4645547530aSGlenn Strauss             off_t clen = (off_t)li_restricted_strtoint64(v, vlen, &err);
4655547530aSGlenn Strauss             if (err == v+vlen) {
4666870b0f5SGlenn Strauss                 /* (set only if not set to -1 by Transfer-Encoding: chunked) */
467af5df352SGlenn Strauss                 if (0 == r->reqbody_length) r->reqbody_length = clen;
4683dd3cde9SGlenn Strauss             }
4693dd3cde9SGlenn Strauss             else {
4706870b0f5SGlenn Strauss                 return http_request_header_line_invalid(r, 400, "invalid Content-Length header -> 400");
4713dd3cde9SGlenn Strauss             }
4723dd3cde9SGlenn Strauss         }
4733dd3cde9SGlenn Strauss         else {
4746870b0f5SGlenn Strauss             return http_request_header_line_invalid(r, 400, "duplicate Content-Length header -> 400");
4753dd3cde9SGlenn Strauss         }
4763dd3cde9SGlenn Strauss         break;
477fa4ab192SGlenn Strauss       case HTTP_HEADER_TRANSFER_ENCODING:
4788d7e9439SGlenn Strauss         if (HTTP_VERSION_1_1 != r->http_version) {
4798d7e9439SGlenn Strauss             return http_request_header_line_invalid(r, 400,
4808d7e9439SGlenn Strauss               HTTP_VERSION_1_0 == r->http_version
4818d7e9439SGlenn Strauss                 ? "HTTP/1.0 with Transfer-Encoding (bad HTTP/1.0 proxy?) -> 400"
4828d7e9439SGlenn Strauss                 : "HTTP/2 with Transfer-Encoding is invalid -> 400");
483fa4ab192SGlenn Strauss         }
484fa4ab192SGlenn Strauss 
485fa4ab192SGlenn Strauss         if (!buffer_eq_icase_ss(v, vlen, CONST_STR_LEN("chunked"))) {
486fa4ab192SGlenn Strauss             /* Transfer-Encoding might contain additional encodings,
487fa4ab192SGlenn Strauss              * which are not currently supported by lighttpd */
4886870b0f5SGlenn Strauss             return http_request_header_line_invalid(r, 501, NULL); /* Not Implemented */
489fa4ab192SGlenn Strauss         }
490af5df352SGlenn Strauss         r->reqbody_length = -1;
491fa4ab192SGlenn Strauss 
492fa4ab192SGlenn Strauss         /* Transfer-Encoding is a hop-by-hop header,
493fa4ab192SGlenn Strauss          * which must not be blindly forwarded to backends */
494fa4ab192SGlenn Strauss         return 0; /* skip header */
495ed0054c2SStefan Bühler     }
496ed0054c2SStefan Bühler 
4977c7f8c46SGlenn Strauss     http_header_request_append(r, id, k, klen, v, vlen);
498e5d61e9aSGlenn Strauss     return 0;
499ed0054c2SStefan Bühler }
500ed0054c2SStefan Bühler 
50163e32e81SGlenn Strauss __attribute_cold__
http_request_parse_proto_loose(request_st * const restrict r,const char * const restrict ptr,const size_t len,const unsigned int http_parseopts)5029914bb29SGlenn Strauss static int http_request_parse_proto_loose(request_st * const restrict r, const char * const restrict ptr, const size_t len, const unsigned int http_parseopts) {
50363e32e81SGlenn Strauss     const char * proto = memchr(ptr, ' ', len);
50463e32e81SGlenn Strauss     if (NULL == proto)
5056870b0f5SGlenn Strauss         return http_request_header_line_invalid(r, 400, "incomplete request line -> 400");
50663e32e81SGlenn Strauss     proto = memchr(proto+1, ' ', len - (proto+1 - ptr));
50763e32e81SGlenn Strauss     if (NULL == proto)
5086870b0f5SGlenn Strauss         return http_request_header_line_invalid(r, 400, "incomplete request line -> 400");
50963e32e81SGlenn Strauss     ++proto;
510bcdc6a3bSJan Kneschke 
51163e32e81SGlenn Strauss     if (proto[0]=='H' && proto[1]=='T' && proto[2]=='T' && proto[3]=='P' && proto[4] == '/') {
51263e32e81SGlenn Strauss         if (proto[5] == '1' && proto[6] == '.' && (proto[7] == '1' || proto[7] == '0')) {
51363e32e81SGlenn Strauss             /* length already checked before calling this routine */
51463e32e81SGlenn Strauss             /* (len != (size_t)(proto - ptr + 8)) */
515af3313bfSGlenn Strauss             if (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT) /*(http_header_strict)*/
5166870b0f5SGlenn Strauss                 return http_request_header_line_invalid(r, 400, "incomplete request line -> 400");
5176870b0f5SGlenn Strauss             r->http_version = (proto[7] == '1') ? HTTP_VERSION_1_1 : HTTP_VERSION_1_0;
51863e32e81SGlenn Strauss         }
51963e32e81SGlenn Strauss         else
5206870b0f5SGlenn Strauss             return http_request_header_line_invalid(r, 505, "unknown HTTP version -> 505");
52163e32e81SGlenn Strauss     }
52263e32e81SGlenn Strauss     else
5236870b0f5SGlenn Strauss         return http_request_header_line_invalid(r, 400, "unknown protocol -> 400");
524bcdc6a3bSJan Kneschke 
52563e32e81SGlenn Strauss     /* keep-alive default: HTTP/1.1 -> true; HTTP/1.0 -> false */
5266870b0f5SGlenn Strauss     r->keep_alive = (HTTP_VERSION_1_0 != r->http_version);
527b9a37291SGlenn Strauss 
52863e32e81SGlenn Strauss     return 0;
52963e32e81SGlenn Strauss }
53063e32e81SGlenn Strauss 
53163e32e81SGlenn Strauss __attribute_cold__
http_request_parse_reqline_uri(request_st * const restrict r,const char * const restrict uri,const size_t len,const unsigned int http_parseopts)532d013d0abSGlenn Strauss static const char * http_request_parse_reqline_uri(request_st * const restrict r, const char * const restrict uri, const size_t len, const unsigned int http_parseopts) {
53363e32e81SGlenn Strauss     const char *nuri;
53463e32e81SGlenn Strauss     if ((len > 7 && buffer_eq_icase_ssn(uri, "http://", 7)
53563e32e81SGlenn Strauss         && NULL != (nuri = memchr(uri + 7, '/', len-7)))
53663e32e81SGlenn Strauss        ||
53763e32e81SGlenn Strauss        (len > 8 && buffer_eq_icase_ssn(uri, "https://", 8)
53863e32e81SGlenn Strauss         && NULL != (nuri = memchr(uri + 8, '/', len-8)))) {
53963e32e81SGlenn Strauss         const char * const host = uri + (uri[4] == ':' ? 7 : 8);
54063e32e81SGlenn Strauss         const size_t hostlen = nuri - host;
54163e32e81SGlenn Strauss         if (0 == hostlen || hostlen >= 1024) { /*(expecting < 256)*/
5426870b0f5SGlenn Strauss             http_request_header_line_invalid(r, 400, "uri-authority empty or too long -> 400");
54363e32e81SGlenn Strauss             return NULL;
54463e32e81SGlenn Strauss         }
5458c5acaaeSGlenn Strauss         /* Insert as "Host" header */
5468c5acaaeSGlenn Strauss         http_request_header_set_Host(r, host, hostlen);
54763e32e81SGlenn Strauss         return nuri;
548af3313bfSGlenn Strauss     } else if (!(http_parseopts & HTTP_PARSEOPT_HEADER_STRICT) /*(!http_header_strict)*/
5496870b0f5SGlenn Strauss            || (HTTP_METHOD_CONNECT == r->http_method && (uri[0] == ':' || light_isdigit(uri[0])))
5506870b0f5SGlenn Strauss            || (HTTP_METHOD_OPTIONS == r->http_method && uri[0] == '*' && 1 == len)) {
55163e32e81SGlenn Strauss         /* (permitted) */
55263e32e81SGlenn Strauss         return uri;
55363e32e81SGlenn Strauss     } else {
5546870b0f5SGlenn Strauss         http_request_header_line_invalid(r, 400, "request-URI parse error -> 400");
55563e32e81SGlenn Strauss         return NULL;
55663e32e81SGlenn Strauss     }
55763e32e81SGlenn Strauss }
55863e32e81SGlenn Strauss 
559f37c90bcSGlenn Strauss 
5608fc8ab89SGlenn Strauss __attribute_cold__
5618fc8ab89SGlenn Strauss __attribute_noinline__
5628fc8ab89SGlenn Strauss static int http_request_parse_header_other(request_st * const restrict r, const char * const restrict k, const int klen, const unsigned int http_header_strict);
563f37c90bcSGlenn Strauss 
564f37c90bcSGlenn Strauss 
5658fc8ab89SGlenn Strauss int
http_request_validate_pseudohdrs(request_st * const restrict r,const int scheme,const unsigned int http_parseopts)5668fc8ab89SGlenn Strauss http_request_validate_pseudohdrs (request_st * const restrict r, const int scheme, const unsigned int http_parseopts)
5678fc8ab89SGlenn Strauss {
568f37c90bcSGlenn Strauss     /* :method is required to indicate method
569f37c90bcSGlenn Strauss      * CONNECT method must have :method and :authority
5705d1aa5d0SGlenn Strauss      *   unless RFC8441 CONNECT extension, which must follow 'other' (below)
571f37c90bcSGlenn Strauss      * All other methods must have at least :method :scheme :path */
572f37c90bcSGlenn Strauss 
573f37c90bcSGlenn Strauss     if (HTTP_METHOD_UNSET == r->http_method)
5748fc8ab89SGlenn Strauss         return http_request_header_line_invalid(r, 400,
5758fc8ab89SGlenn Strauss           "missing pseudo-header method -> 400");
576f37c90bcSGlenn Strauss 
5775d1aa5d0SGlenn Strauss     if (HTTP_METHOD_CONNECT != r->http_method)
5785d1aa5d0SGlenn Strauss         r->h2_connect_ext = 0;
5795d1aa5d0SGlenn Strauss 
5805d1aa5d0SGlenn Strauss     if (__builtin_expect( (HTTP_METHOD_CONNECT != r->http_method), 1)
5815d1aa5d0SGlenn Strauss         || __builtin_expect( (r->h2_connect_ext != 0), 0)) {
5825d1aa5d0SGlenn Strauss 
583f37c90bcSGlenn Strauss         if (!scheme)
5848fc8ab89SGlenn Strauss             return http_request_header_line_invalid(r, 400,
5858fc8ab89SGlenn Strauss               "missing pseudo-header scheme -> 400");
586f37c90bcSGlenn Strauss 
587af3df29aSGlenn Strauss         if (buffer_is_blank(&r->target))
5888fc8ab89SGlenn Strauss             return http_request_header_line_invalid(r, 400,
5898fc8ab89SGlenn Strauss               "missing pseudo-header path -> 400");
590f37c90bcSGlenn Strauss 
5918fc8ab89SGlenn Strauss         const char * const uri = r->target.ptr;
592f37c90bcSGlenn Strauss         if (*uri != '/') { /* (common case: (*uri == '/')) */
5938fc8ab89SGlenn Strauss             if (uri[0] != '*' || uri[1] != '\0'
5948fc8ab89SGlenn Strauss                 || HTTP_METHOD_OPTIONS != r->http_method)
5958fc8ab89SGlenn Strauss                 return http_request_header_line_invalid(r, 400,
5968fc8ab89SGlenn Strauss                   "invalid pseudo-header path -> 400");
597f37c90bcSGlenn Strauss         }
598f37c90bcSGlenn Strauss     }
599f37c90bcSGlenn Strauss     else { /* HTTP_METHOD_CONNECT */
600f37c90bcSGlenn Strauss         if (NULL == r->http_host)
6018fc8ab89SGlenn Strauss             return http_request_header_line_invalid(r, 400,
6028fc8ab89SGlenn Strauss               "missing pseudo-header authority -> 400");
603af3df29aSGlenn Strauss         if (!buffer_is_blank(&r->target) || scheme)
6048fc8ab89SGlenn Strauss             return http_request_header_line_invalid(r, 400,
6058fc8ab89SGlenn Strauss               "invalid pseudo-header with CONNECT -> 400");
6068c5acaaeSGlenn Strauss         /* note: this copy occurs prior to http_request_host_policy()
6078c5acaaeSGlenn Strauss          * so any consumer handling CONNECT should normalize r->target
6088c5acaaeSGlenn Strauss          * as appropriate */
6098fc8ab89SGlenn Strauss         buffer_copy_buffer(&r->target, r->http_host);
610f37c90bcSGlenn Strauss     }
6118fc8ab89SGlenn Strauss     buffer_copy_buffer(&r->target_orig, &r->target);
612f37c90bcSGlenn Strauss 
613f37c90bcSGlenn Strauss     /* r->http_host, if set, is checked with http_request_host_policy()
614f37c90bcSGlenn Strauss      * in http_request_parse() */
615f37c90bcSGlenn Strauss 
6168fc8ab89SGlenn Strauss     /* copied and modified from end of http_request_parse_reqline() */
617f37c90bcSGlenn Strauss 
618f37c90bcSGlenn Strauss     /* check uri for invalid characters */
619438daddeSGlenn Strauss     const uint32_t len = buffer_clen(&r->target);/*(http_header_strict)*/
620438daddeSGlenn Strauss     const char * const x = (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT)
621438daddeSGlenn Strauss       ? (http_parseopts & HTTP_PARSEOPT_URL_NORMALIZE_CTRLS_REJECT)
622438daddeSGlenn Strauss           ? NULL /* URI will be checked in http_request_parse_target() */
623438daddeSGlenn Strauss           : http_request_check_uri_strict((const uint8_t *)r->target.ptr, len)
624438daddeSGlenn Strauss       : http_request_check_line_minimal(r->target.ptr, len);
625438daddeSGlenn Strauss     return (NULL == x)
626438daddeSGlenn Strauss       ? 0
627438daddeSGlenn Strauss       : http_request_header_char_invalid(r, *x,
6288fc8ab89SGlenn Strauss           "invalid character in URI -> 400");
629f37c90bcSGlenn Strauss }
630f37c90bcSGlenn Strauss 
6318fc8ab89SGlenn Strauss 
6328fc8ab89SGlenn Strauss int
http_request_parse_header(request_st * const restrict r,http_header_parse_ctx * const restrict hpctx)6338fc8ab89SGlenn Strauss http_request_parse_header (request_st * const restrict r, http_header_parse_ctx * const restrict hpctx)
6348fc8ab89SGlenn Strauss {
6355547530aSGlenn Strauss     /* Note: k and v might not be '\0' terminated strings;
6365547530aSGlenn Strauss      * care must be taken to avoid libc funcs which expect z-strings */
6378fc8ab89SGlenn Strauss     const char * const restrict k = hpctx->k;
638772919f9SGlenn Strauss     const char * restrict v = hpctx->v;
6398fc8ab89SGlenn Strauss     const uint32_t klen = hpctx->klen;
640772919f9SGlenn Strauss     uint32_t vlen = hpctx->vlen;
6418fc8ab89SGlenn Strauss 
6428fc8ab89SGlenn Strauss     if (0 == klen)
6438fc8ab89SGlenn Strauss         return http_request_header_line_invalid(r, 400,
6448fc8ab89SGlenn Strauss           "invalid header key -> 400");
6458fc8ab89SGlenn Strauss 
6468fc8ab89SGlenn Strauss     if ((hpctx->hlen += klen + vlen + 4) > hpctx->max_request_field_size) {
6478fc8ab89SGlenn Strauss         /*(configurable with server.max-request-field-size; default 8k)*/
6488fc8ab89SGlenn Strauss       #if 1 /* emit to error log for people sending large headers */
6498fc8ab89SGlenn Strauss         log_error(r->conf.errh, __FILE__, __LINE__,
6508fc8ab89SGlenn Strauss                   "oversized request header -> 431");
6518fc8ab89SGlenn Strauss         return 431; /* Request Header Fields Too Large */
6528fc8ab89SGlenn Strauss       #else
6538fc8ab89SGlenn Strauss         /* 431 Request Header Fields Too Large */
6548fc8ab89SGlenn Strauss         return http_request_header_line_invalid(r, 431,
6558fc8ab89SGlenn Strauss           "oversized request header -> 431");
6568fc8ab89SGlenn Strauss       #endif
6578fc8ab89SGlenn Strauss     }
6588fc8ab89SGlenn Strauss 
6598fc8ab89SGlenn Strauss     if (!hpctx->trailers) {
6608fc8ab89SGlenn Strauss         if (*k == ':') {
6618fc8ab89SGlenn Strauss             /* HTTP/2 request pseudo-header fields */
6628fc8ab89SGlenn Strauss             if (!hpctx->pseudo) /*(pseudo header after non-pseudo header)*/
6638fc8ab89SGlenn Strauss                 return http_request_header_line_invalid(r, 400,
6648fc8ab89SGlenn Strauss                   "invalid pseudo-header -> 400");
665262561faSGlenn Strauss             if (0 == vlen)
666262561faSGlenn Strauss                 return http_request_header_line_invalid(r, 400,
667262561faSGlenn Strauss                   "invalid header value -> 400");
6680f90a9e3SGlenn Strauss 
6690f90a9e3SGlenn Strauss             /* (note: relies on implementation details using ls-hpack in h2.c)
6700f90a9e3SGlenn Strauss              * (hpctx->id mapped from lsxpack_header_t hpack_index, which only
6710f90a9e3SGlenn Strauss              *  matches key, not also value, if lsxpack_header_t flags does not
6720f90a9e3SGlenn Strauss              *  have LSXPACK_HPACK_VAL_MATCHED set, so HTTP_HEADER_H2_METHOD_GET
6730f90a9e3SGlenn Strauss              *  below indicates any method, not only "GET") */
6740f90a9e3SGlenn Strauss             if (__builtin_expect( (hpctx->id == HTTP_HEADER_H2_UNKNOWN), 0)) {
6758fc8ab89SGlenn Strauss                 switch (klen-1) {
6768fc8ab89SGlenn Strauss                   case 4:
6770f90a9e3SGlenn Strauss                     if (0 == memcmp(k+1, "path", 4))
6780f90a9e3SGlenn Strauss                         hpctx->id = HTTP_HEADER_H2_PATH;
6798fc8ab89SGlenn Strauss                     break;
6808fc8ab89SGlenn Strauss                   case 6:
6810f90a9e3SGlenn Strauss                     if (0 == memcmp(k+1, "method", 6))
6820f90a9e3SGlenn Strauss                         hpctx->id = HTTP_HEADER_H2_METHOD_GET;
6830f90a9e3SGlenn Strauss                     else if (0 == memcmp(k+1, "scheme", 6))
6840f90a9e3SGlenn Strauss                         hpctx->id = HTTP_HEADER_H2_SCHEME_HTTP;
6850f90a9e3SGlenn Strauss                     break;
6865d1aa5d0SGlenn Strauss                   case 8:
6875d1aa5d0SGlenn Strauss                     if (0 == memcmp(k+1, "protocol", 8))
6885d1aa5d0SGlenn Strauss                         hpctx->id = HTTP_HEADER_H2_PROTOCOL;
6895d1aa5d0SGlenn Strauss                     break;
6900f90a9e3SGlenn Strauss                   case 9:
6910f90a9e3SGlenn Strauss                     if (0 == memcmp(k+1, "authority", 9))
6920f90a9e3SGlenn Strauss                         hpctx->id = HTTP_HEADER_H2_AUTHORITY;
6930f90a9e3SGlenn Strauss                     break;
6940f90a9e3SGlenn Strauss                   default:
6950f90a9e3SGlenn Strauss                     break;
6960f90a9e3SGlenn Strauss                 }
6970f90a9e3SGlenn Strauss                 if (hpctx->id >= HTTP_HEADER_H2_UNKNOWN)
6988fc8ab89SGlenn Strauss                     return http_request_header_line_invalid(r, 400,
6990f90a9e3SGlenn Strauss                       "invalid pseudo-header -> 400");
7000f90a9e3SGlenn Strauss             }
7010f90a9e3SGlenn Strauss 
7020f90a9e3SGlenn Strauss             switch (hpctx->id) {
7030f90a9e3SGlenn Strauss               case HTTP_HEADER_H2_AUTHORITY:
7040f90a9e3SGlenn Strauss                 if (__builtin_expect( (r->http_host != NULL), 0))
7050f90a9e3SGlenn Strauss                     break;
7060f90a9e3SGlenn Strauss                 if (vlen >= 1024) /*(expecting < 256)*/
7070f90a9e3SGlenn Strauss                     return http_request_header_line_invalid(r, 400,
7080f90a9e3SGlenn Strauss                       "invalid pseudo-header authority too long -> 400");
7098c5acaaeSGlenn Strauss                 /* insert as "Host" header */
7108c5acaaeSGlenn Strauss                 http_request_header_set_Host(r, v, vlen);
7110f90a9e3SGlenn Strauss                 return 0;
7120f90a9e3SGlenn Strauss               case HTTP_HEADER_H2_METHOD_GET:  /*(any method, not only "GET")*/
7130f90a9e3SGlenn Strauss               case HTTP_HEADER_H2_METHOD_POST:
7140f90a9e3SGlenn Strauss                 if (__builtin_expect( (HTTP_METHOD_UNSET != r->http_method), 0))
7150f90a9e3SGlenn Strauss                     break;
7168fc8ab89SGlenn Strauss                 r->http_method = get_http_method_key(v, vlen);
7178fc8ab89SGlenn Strauss                 if (HTTP_METHOD_UNSET >= r->http_method)
7188fc8ab89SGlenn Strauss                     return http_request_header_line_invalid(r, 501,
7198fc8ab89SGlenn Strauss                       "unknown http-method -> 501");
7208fc8ab89SGlenn Strauss                 return 0;
7210f90a9e3SGlenn Strauss               case HTTP_HEADER_H2_PATH:            /*(any path, not only "/")*/
7220f90a9e3SGlenn Strauss               case HTTP_HEADER_H2_PATH_INDEX_HTML:
7230f90a9e3SGlenn Strauss                 if (__builtin_expect( (!buffer_is_blank(&r->target)), 0))
7240f90a9e3SGlenn Strauss                     break;
7250f90a9e3SGlenn Strauss                 buffer_copy_string_len(&r->target, v, vlen);
7260f90a9e3SGlenn Strauss                 return 0;
7270f90a9e3SGlenn Strauss               case HTTP_HEADER_H2_SCHEME_HTTP: /*(any scheme, not only "http")*/
7280f90a9e3SGlenn Strauss               case HTTP_HEADER_H2_SCHEME_HTTPS:
7290f90a9e3SGlenn Strauss                 if (__builtin_expect( (hpctx->scheme), 0))
7300f90a9e3SGlenn Strauss                     break;
7310f90a9e3SGlenn Strauss                 hpctx->scheme = 1; /*(marked present, but otherwise ignored)*/
7320f90a9e3SGlenn Strauss                 return 0;
7330f90a9e3SGlenn Strauss                #if 0
7348fc8ab89SGlenn Strauss                 switch (vlen) {/*(validated, but then ignored)*/
7358fc8ab89SGlenn Strauss                   case 5: /* "https" */
7368fc8ab89SGlenn Strauss                     if (v[4]!='s') break;
7378fc8ab89SGlenn Strauss                     __attribute_fallthrough__
7388fc8ab89SGlenn Strauss                   case 4: /* "http" */
7398fc8ab89SGlenn Strauss                     if (v[0]=='h' && v[1]=='t' && v[2]=='t' && v[3]=='p') {
7408fc8ab89SGlenn Strauss                         hpctx->scheme = 1;
7418fc8ab89SGlenn Strauss                         return 0;
7428fc8ab89SGlenn Strauss                     }
7438fc8ab89SGlenn Strauss                     break;
7448fc8ab89SGlenn Strauss                   default:
7458fc8ab89SGlenn Strauss                     break;
7468fc8ab89SGlenn Strauss                 }
7478fc8ab89SGlenn Strauss                 return http_request_header_line_invalid(r, 400,
7488fc8ab89SGlenn Strauss                   "unknown pseudo-header scheme -> 400");
7490f90a9e3SGlenn Strauss                #endif
7505d1aa5d0SGlenn Strauss               case HTTP_HEADER_H2_PROTOCOL:
7515d1aa5d0SGlenn Strauss                 /* support only ":protocol: websocket" for now */
7525d1aa5d0SGlenn Strauss                 if (vlen != 9 || 0 != memcmp(v, "websocket", 9))
7535d1aa5d0SGlenn Strauss                     return http_request_header_line_invalid(r, 405,
7545d1aa5d0SGlenn Strauss                       "unhandled :protocol value -> 405");
7555d1aa5d0SGlenn Strauss                 /*(future: might be enum of recognized :protocol: ext values)*/
7565d1aa5d0SGlenn Strauss                 r->h2_connect_ext = 1;
7575d1aa5d0SGlenn Strauss                 return 0;
7588fc8ab89SGlenn Strauss               default:
7598fc8ab89SGlenn Strauss                 return http_request_header_line_invalid(r, 400,
7608fc8ab89SGlenn Strauss                   "invalid pseudo-header -> 400");
7618fc8ab89SGlenn Strauss             }
7620f90a9e3SGlenn Strauss             return http_request_header_line_invalid(r, 400,
7630f90a9e3SGlenn Strauss               "repeated pseudo-header -> 400");
7640f90a9e3SGlenn Strauss         }
7658fc8ab89SGlenn Strauss         else { /*(non-pseudo headers)*/
7668fc8ab89SGlenn Strauss             if (hpctx->pseudo) { /*(transition to non-pseudo headers)*/
7678fc8ab89SGlenn Strauss                 hpctx->pseudo = 0;
7688fc8ab89SGlenn Strauss                 int status =
7698fc8ab89SGlenn Strauss                   http_request_validate_pseudohdrs(r, hpctx->scheme,
7708fc8ab89SGlenn Strauss                                                    hpctx->http_parseopts);
7718fc8ab89SGlenn Strauss                 if (0 != status) return status;
7728fc8ab89SGlenn Strauss             }
773262561faSGlenn Strauss             if (0 == vlen)
774262561faSGlenn Strauss                 return 0;
7758fc8ab89SGlenn Strauss 
7768fc8ab89SGlenn Strauss             const unsigned int http_header_strict =
7778fc8ab89SGlenn Strauss               (hpctx->http_parseopts & HTTP_PARSEOPT_HEADER_STRICT);
7788fc8ab89SGlenn Strauss 
779438daddeSGlenn Strauss             const char * const x = (http_header_strict)
780438daddeSGlenn Strauss               ? http_request_check_line_strict(v, vlen)
781438daddeSGlenn Strauss               : http_request_check_line_minimal(v, vlen);
782438daddeSGlenn Strauss             if (x)
783438daddeSGlenn Strauss                 return http_request_header_char_invalid(r, *x,
7848fc8ab89SGlenn Strauss                   "invalid character in header -> 400");
7858fc8ab89SGlenn Strauss 
786772919f9SGlenn Strauss             /* remove leading and trailing whitespace (strict RFC conformance)*/
787772919f9SGlenn Strauss             if (__builtin_expect( (*v <= 0x20), 0)) {
788772919f9SGlenn Strauss                 while ((*v == ' ' || *v == '\t') && (++v, --vlen)) ;
789772919f9SGlenn Strauss                 if (0 == vlen)
790772919f9SGlenn Strauss                     return 0;
791772919f9SGlenn Strauss             }
792772919f9SGlenn Strauss             if (__builtin_expect( (v[vlen-1] <= 0x20), 0)) {
793772919f9SGlenn Strauss                 while (v[vlen-1] == ' ' || v[vlen-1] == '\t') --vlen;
794772919f9SGlenn Strauss             }
795772919f9SGlenn Strauss 
7960f90a9e3SGlenn Strauss             if (__builtin_expect( (hpctx->id == HTTP_HEADER_H2_UNKNOWN), 0)) {
7970f90a9e3SGlenn Strauss                 uint32_t j = 0;
7980f90a9e3SGlenn Strauss                 while (j < klen && (light_islower(k[j]) || k[j] == '-'))
7990f90a9e3SGlenn Strauss                     ++j;
8000f90a9e3SGlenn Strauss 
8010f90a9e3SGlenn Strauss                 if (__builtin_expect( (j != klen), 0)) {
8020f90a9e3SGlenn Strauss                     if (light_isupper(k[j]))
8030f90a9e3SGlenn Strauss                         return 400;
8040f90a9e3SGlenn Strauss                     if (0 != http_request_parse_header_other(r, k+j, klen-j,
8050f90a9e3SGlenn Strauss                                                             http_header_strict))
8060f90a9e3SGlenn Strauss                         return 400;
8070f90a9e3SGlenn Strauss                 }
8080f90a9e3SGlenn Strauss 
8090f90a9e3SGlenn Strauss                 hpctx->id = http_header_hkey_get_lc(k, klen);
8100f90a9e3SGlenn Strauss             }
8110f90a9e3SGlenn Strauss 
8120f90a9e3SGlenn Strauss             const enum http_header_e id = (enum http_header_e)hpctx->id;
8130f90a9e3SGlenn Strauss 
8140f90a9e3SGlenn Strauss             if (__builtin_expect( (id == HTTP_HEADER_TE), 0)
8150f90a9e3SGlenn Strauss                 && !buffer_eq_icase_ss(v, vlen, CONST_STR_LEN("trailers")))
8160f90a9e3SGlenn Strauss                 return http_request_header_line_invalid(r, 400,
8170f90a9e3SGlenn Strauss                   "invalid TE header value with HTTP/2 -> 400");
8180f90a9e3SGlenn Strauss 
8198fc8ab89SGlenn Strauss             return http_request_parse_single_header(r, id, k, klen, v, vlen);
8208fc8ab89SGlenn Strauss         }
8218fc8ab89SGlenn Strauss     }
8228fc8ab89SGlenn Strauss     else { /*(trailers)*/
823a407636eSGlenn Strauss         if (*k == ':')
824a407636eSGlenn Strauss             return http_request_header_line_invalid(r, 400,
825a407636eSGlenn Strauss               "invalid pseudo-header in trailers -> 400");
8268fc8ab89SGlenn Strauss         /* ignore trailers (after required HPACK decoding) if streaming
8278fc8ab89SGlenn Strauss          * request body to backend since headers have already been sent
8288fc8ab89SGlenn Strauss          * to backend via Common Gateway Interface (CGI) (CGI, FastCGI,
8298fc8ab89SGlenn Strauss          * SCGI, etc) or HTTP/1.1 (proxy) (mod_proxy does not currently
8308fc8ab89SGlenn Strauss          * support using HTTP/2 to connect to backends) */
8318fc8ab89SGlenn Strauss       #if 0 /* (if needed, save flag in hpctx instead of fdevent.h dependency)*/
8328fc8ab89SGlenn Strauss         if (r->conf.stream_request_body & FDEVENT_STREAM_REQUEST)
8338fc8ab89SGlenn Strauss             return 0;
8348fc8ab89SGlenn Strauss       #endif
8358fc8ab89SGlenn Strauss         /* Note: do not unconditionally merge into headers since if
8368fc8ab89SGlenn Strauss          * headers had already been sent to backend, then mod_accesslog
8378fc8ab89SGlenn Strauss          * logging of request headers might be inaccurate.
8388fc8ab89SGlenn Strauss          * Many simple backends do not support HTTP/1.1 requests sending
8398fc8ab89SGlenn Strauss          * Transfer-Encoding: chunked, and even those that do might not
8408fc8ab89SGlenn Strauss          * handle trailers.  Some backends do not even support HTTP/1.1.
8418fc8ab89SGlenn Strauss          * For all these reasons, ignore trailers if streaming request
8428fc8ab89SGlenn Strauss          * body to backend.  Revisit in future if adding support for
8438fc8ab89SGlenn Strauss          * connecting to backends using HTTP/2 (with explicit config
8448fc8ab89SGlenn Strauss          * option to force connecting to backends using HTTP/2) */
8458fc8ab89SGlenn Strauss 
8468fc8ab89SGlenn Strauss         /* XXX: TODO: request trailers not handled if streaming reqbody
8478fc8ab89SGlenn Strauss          * XXX: must ensure that trailers are not disallowed field-names
8488fc8ab89SGlenn Strauss          */
8498fc8ab89SGlenn Strauss 
850262561faSGlenn Strauss       #if 0
851262561faSGlenn Strauss         if (0 == vlen)
852262561faSGlenn Strauss             return 0;
853262561faSGlenn Strauss       #endif
854262561faSGlenn Strauss 
8558fc8ab89SGlenn Strauss         return 0;
8568fc8ab89SGlenn Strauss     }
8578fc8ab89SGlenn Strauss }
8588fc8ab89SGlenn Strauss 
8598fc8ab89SGlenn Strauss 
http_request_parse_reqline(request_st * const restrict r,const char * const restrict ptr,const unsigned short * const restrict hoff,const unsigned int http_parseopts)8609914bb29SGlenn Strauss static int http_request_parse_reqline(request_st * const restrict r, const char * const restrict ptr, const unsigned short * const restrict hoff, const unsigned int http_parseopts) {
86163e32e81SGlenn Strauss     size_t len = hoff[2];
862bcdc6a3bSJan Kneschke 
863bcdc6a3bSJan Kneschke     /* parse the first line of the request
864bcdc6a3bSJan Kneschke      * <method> <uri> <protocol>\r\n
865bcdc6a3bSJan Kneschke      * */
86663e32e81SGlenn Strauss     if (len < 13) /* minimum len with (!http_header_strict): "x x HTTP/1.0\n" */
8676870b0f5SGlenn Strauss         return http_request_header_line_invalid(r, 400, "invalid request line (too short) -> 400");
86863e32e81SGlenn Strauss     if (ptr[len-2] == '\r')
86963e32e81SGlenn Strauss         len-=2;
870af3313bfSGlenn Strauss     else if (!(http_parseopts & HTTP_PARSEOPT_HEADER_STRICT)) /*(!http_header_strict)*/
87163e32e81SGlenn Strauss         len-=1;
87263e32e81SGlenn Strauss     else
8736870b0f5SGlenn Strauss         return http_request_header_line_invalid(r, 400, "missing CR before LF in header -> 400");
874bcdc6a3bSJan Kneschke 
8750b8de4b3SJan Kneschke     /*
876950832afSGlenn Strauss      * RFC7230:
877950832afSGlenn Strauss      *   HTTP-version  = HTTP-name "/" DIGIT "." DIGIT
878950832afSGlenn Strauss      *   HTTP-name     = %x48.54.54.50 ; "HTTP", case-sensitive
879950832afSGlenn Strauss      */
88063e32e81SGlenn Strauss 
88163e32e81SGlenn Strauss     /* protocol is expected to be " HTTP/1.1" or " HTTP/1.0" at end of line */
88263e32e81SGlenn Strauss     union proto_un {
88363e32e81SGlenn Strauss       char c[8];
88463e32e81SGlenn Strauss       uint64_t u;
88563e32e81SGlenn Strauss     };
88663e32e81SGlenn Strauss     static const union proto_un http_1_1 = {{'H','T','T','P','/','1','.','1'}};
88763e32e81SGlenn Strauss     static const union proto_un http_1_0 = {{'H','T','T','P','/','1','.','0'}};
88863e32e81SGlenn Strauss     const char *p = ptr + len - 8;
88963e32e81SGlenn Strauss     union proto_un proto8;
89063e32e81SGlenn Strauss     proto8.c[0]=p[0]; proto8.c[1]=p[1]; proto8.c[2]=p[2]; proto8.c[3]=p[3];
89163e32e81SGlenn Strauss     proto8.c[4]=p[4]; proto8.c[5]=p[5]; proto8.c[6]=p[6]; proto8.c[7]=p[7];
89263e32e81SGlenn Strauss     if (p[-1] == ' ' && http_1_1.u == proto8.u) {
8936870b0f5SGlenn Strauss         r->http_version = HTTP_VERSION_1_1;
8946870b0f5SGlenn Strauss         r->keep_alive = 1; /* keep-alive default: HTTP/1.1 -> true */
895bcdc6a3bSJan Kneschke     }
89663e32e81SGlenn Strauss     else if (p[-1] == ' ' && http_1_0.u == proto8.u) {
8976870b0f5SGlenn Strauss         r->http_version = HTTP_VERSION_1_0;
8986870b0f5SGlenn Strauss         r->keep_alive = 0; /* keep-alive default: HTTP/1.0 -> false */
89963e32e81SGlenn Strauss     }
90063e32e81SGlenn Strauss     else {
9016870b0f5SGlenn Strauss         int status = http_request_parse_proto_loose(r,ptr,len,http_parseopts);
90263e32e81SGlenn Strauss         if (0 != status) return status;
90363e32e81SGlenn Strauss         /*(space char must exist if http_request_parse_proto_loose() succeeds)*/
90463e32e81SGlenn Strauss         for (p = ptr + len - 9; p[-1] != ' '; --p) ;
905bcdc6a3bSJan Kneschke     }
906bcdc6a3bSJan Kneschke 
90763e32e81SGlenn Strauss     /* method is expected to be a short string in the general case */
90863e32e81SGlenn Strauss     size_t i = 0;
90963e32e81SGlenn Strauss     while (ptr[i] != ' ') ++i;
91063e32e81SGlenn Strauss   #if 0 /*(space must exist if protocol was parsed successfully)*/
91163e32e81SGlenn Strauss     while (i < len && ptr[i] != ' ') ++i;
91263e32e81SGlenn Strauss     if (ptr[i] != ' ')
9136870b0f5SGlenn Strauss         return http_request_header_line_invalid(r, 400, "incomplete request line -> 400");
91463e32e81SGlenn Strauss   #endif
9159149b564SGlenn Strauss 
9166870b0f5SGlenn Strauss     r->http_method = get_http_method_key(ptr, i);
9178d7e9439SGlenn Strauss     if (HTTP_METHOD_UNSET >= r->http_method)
9186870b0f5SGlenn Strauss         return http_request_header_line_invalid(r, 501, "unknown http-method -> 501");
91917d0c36eSStefan Bühler 
92063e32e81SGlenn Strauss     const char *uri = ptr + i + 1;
921bcdc6a3bSJan Kneschke 
92263e32e81SGlenn Strauss     if (uri == p)
9236870b0f5SGlenn Strauss         return http_request_header_line_invalid(r, 400, "no uri specified -> 400");
92463e32e81SGlenn Strauss     len = (size_t)(p - uri - 1);
92563e32e81SGlenn Strauss 
92663e32e81SGlenn Strauss     if (*uri != '/') { /* (common case: (*uri == '/')) */
927d013d0abSGlenn Strauss         uri = http_request_parse_reqline_uri(r, uri, len, http_parseopts);
92863e32e81SGlenn Strauss         if (NULL == uri) return 400;
92963e32e81SGlenn Strauss         len = (size_t)(p - uri - 1);
930bcdc6a3bSJan Kneschke     }
931bcdc6a3bSJan Kneschke 
93263e32e81SGlenn Strauss     if (0 == len)
9336870b0f5SGlenn Strauss         return http_request_header_line_invalid(r, 400, "no uri specified -> 400");
93463e32e81SGlenn Strauss 
935438daddeSGlenn Strauss     /* check uri for invalid characters */     /* http_header_strict */
936438daddeSGlenn Strauss     const char * const x = (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT)
937438daddeSGlenn Strauss       ? (http_parseopts & HTTP_PARSEOPT_URL_NORMALIZE_CTRLS_REJECT)
938438daddeSGlenn Strauss           ? NULL /* URI will be checked in http_request_parse_target() */
939438daddeSGlenn Strauss           : http_request_check_uri_strict((const uint8_t *)uri, len)
940438daddeSGlenn Strauss       : memchr(ptr, '\0', hoff[hoff[0]]);/* check entire headers set for '\0' */
941438daddeSGlenn Strauss     if (x)
942438daddeSGlenn Strauss         http_request_header_char_invalid(r, *x, "invalid character in URI -> 400");
943bcdc6a3bSJan Kneschke 
9447c7f8c46SGlenn Strauss     buffer_copy_string_len(&r->target, uri, len);
9457c7f8c46SGlenn Strauss     buffer_copy_string_len(&r->target_orig, uri, len);
946e5d61e9aSGlenn Strauss     return 0;
947c8159ee5SGlenn Strauss }
948c8159ee5SGlenn Strauss 
http_request_parse_target(request_st * const r,int scheme_port)949d013d0abSGlenn Strauss int http_request_parse_target(request_st * const r, int scheme_port) {
950d013d0abSGlenn Strauss     /* URI is parsed into components at start of request and may
951d013d0abSGlenn Strauss      * also be re-parsed upon HANDLER_COMEBACK during the request
952d013d0abSGlenn Strauss      * r->target is expected to be a "/url-part?query-part"
953d013d0abSGlenn Strauss      *   (and *not* a fully-qualified URI starting https://...)
954d013d0abSGlenn Strauss      * r->uri.authority is expected to be parsed elsewhere into r->http_host
955d013d0abSGlenn Strauss      */
956d013d0abSGlenn Strauss 
957d013d0abSGlenn Strauss     /**
958d013d0abSGlenn Strauss      * prepare strings
959d013d0abSGlenn Strauss      *
960d013d0abSGlenn Strauss      * - uri.path
961d013d0abSGlenn Strauss      * - uri.query
962d013d0abSGlenn Strauss      *
963d013d0abSGlenn Strauss      */
964d013d0abSGlenn Strauss 
965d013d0abSGlenn Strauss     /**
966d013d0abSGlenn Strauss      * Name according to RFC 2396
967d013d0abSGlenn Strauss      *
968d013d0abSGlenn Strauss      * - scheme
969d013d0abSGlenn Strauss      * - authority
970d013d0abSGlenn Strauss      * - path
971d013d0abSGlenn Strauss      * - query
972d013d0abSGlenn Strauss      *
973d013d0abSGlenn Strauss      * (scheme)://(authority)(path)?(query)#fragment
974d013d0abSGlenn Strauss      *
975d013d0abSGlenn Strauss      */
976d013d0abSGlenn Strauss 
977d013d0abSGlenn Strauss     /* take initial scheme value from connection-level state
978d013d0abSGlenn Strauss      * (request r->uri.scheme can be overwritten for later,
979d013d0abSGlenn Strauss      *  for example by mod_extforward or mod_magnet) */
980dc01487eSGlenn Strauss     buffer_copy_string_len(&r->uri.scheme, "https", scheme_port == 443 ? 5 : 4);
981d013d0abSGlenn Strauss 
982d013d0abSGlenn Strauss     buffer * const target = &r->target;
9835d1aa5d0SGlenn Strauss     if ((r->http_method == HTTP_METHOD_CONNECT && !r->h2_connect_ext)
984d013d0abSGlenn Strauss         || (r->http_method == HTTP_METHOD_OPTIONS
985d013d0abSGlenn Strauss             && target->ptr[0] == '*'
986d013d0abSGlenn Strauss             && target->ptr[1] == '\0')) {
987d013d0abSGlenn Strauss         /* CONNECT ... (or) OPTIONS * ... */
988d013d0abSGlenn Strauss         buffer_copy_buffer(&r->uri.path, target);
989d013d0abSGlenn Strauss         buffer_clear(&r->uri.query);
990d013d0abSGlenn Strauss         return 0;
991d013d0abSGlenn Strauss     }
992d013d0abSGlenn Strauss 
993d013d0abSGlenn Strauss     char *qstr;
994d013d0abSGlenn Strauss     if (r->conf.http_parseopts & HTTP_PARSEOPT_URL_NORMALIZE) {
995af3df29aSGlenn Strauss         /*uint32_t len = buffer_clen(target);*/
996d013d0abSGlenn Strauss         int qs = burl_normalize(target, r->tmp_buf, r->conf.http_parseopts);
99735fa47d8SGlenn Strauss         if (-2 == qs)
99835fa47d8SGlenn Strauss             return http_request_header_line_invalid(r, 400,
99935fa47d8SGlenn Strauss               "invalid character in URI -> 400"); /* Bad Request */
1000d013d0abSGlenn Strauss         qstr = (-1 == qs) ? NULL : target->ptr+qs;
1001d013d0abSGlenn Strauss       #if 0  /* future: might enable here, or below for all requests */
1002d013d0abSGlenn Strauss         /* (Note: total header size not recalculated on HANDLER_COMEBACK
1003d013d0abSGlenn Strauss          *  even if other request headers changed during processing)
1004d013d0abSGlenn Strauss          * (If (0 != r->loops_per_request), then the generated
1005d013d0abSGlenn Strauss          *  request is too large.  Should a different error be returned?) */
1006d013d0abSGlenn Strauss         r->rqst_header_len -= len;
1007af3df29aSGlenn Strauss         len = buffer_clen(target);
1008d013d0abSGlenn Strauss         r->rqst_header_len += len;
1009d013d0abSGlenn Strauss         if (len > MAX_HTTP_REQUEST_URI) {
10101d9709b8SGlenn Strauss             return 414; /* 414 URI Too Long */
1011d013d0abSGlenn Strauss         }
1012d013d0abSGlenn Strauss         if (r->rqst_header_len > MAX_HTTP_REQUEST_HEADER) {
1013d013d0abSGlenn Strauss             log_error(r->conf.errh, __FILE__, __LINE__,
1014d013d0abSGlenn Strauss               "request header fields too large: %u -> 431",
1015d013d0abSGlenn Strauss               r->rqst_header_len);
1016d013d0abSGlenn Strauss             return 431; /* Request Header Fields Too Large */
1017d013d0abSGlenn Strauss         }
1018d013d0abSGlenn Strauss       #endif
1019d013d0abSGlenn Strauss     }
1020d013d0abSGlenn Strauss     else {
1021af3df29aSGlenn Strauss         size_t rlen = buffer_clen(target);
1022d013d0abSGlenn Strauss         qstr = memchr(target->ptr, '#', rlen);/* discard fragment */
1023d013d0abSGlenn Strauss         if (qstr) {
1024d013d0abSGlenn Strauss             rlen = (size_t)(qstr - target->ptr);
1025af3df29aSGlenn Strauss             buffer_truncate(target, rlen);
1026d013d0abSGlenn Strauss         }
1027d013d0abSGlenn Strauss         qstr = memchr(target->ptr, '?', rlen);
1028d013d0abSGlenn Strauss     }
1029d013d0abSGlenn Strauss 
1030d013d0abSGlenn Strauss     /** extract query string from target */
1031d013d0abSGlenn Strauss     const char * const pstr = target->ptr;
1032af3df29aSGlenn Strauss     const uint32_t rlen = buffer_clen(target);
1033a0029b21SGlenn Strauss     uint32_t plen;
1034a0029b21SGlenn Strauss     if (NULL != qstr) {
1035a0029b21SGlenn Strauss         plen = (uint32_t)(qstr - pstr);
1036d013d0abSGlenn Strauss         buffer_copy_string_len(&r->uri.query, qstr + 1, rlen - plen - 1);
1037d013d0abSGlenn Strauss     }
1038d013d0abSGlenn Strauss     else {
1039a0029b21SGlenn Strauss         plen = rlen;
1040d013d0abSGlenn Strauss         buffer_clear(&r->uri.query);
1041d013d0abSGlenn Strauss     }
1042a0029b21SGlenn Strauss     buffer_copy_string_len(&r->uri.path, pstr, plen);
1043d013d0abSGlenn Strauss 
1044d013d0abSGlenn Strauss     /* decode url to path
1045d013d0abSGlenn Strauss      *
1046d013d0abSGlenn Strauss      * - decode url-encodings  (e.g. %20 -> ' ')
1047d013d0abSGlenn Strauss      * - remove path-modifiers (e.g. /../)
1048d013d0abSGlenn Strauss      */
1049d013d0abSGlenn Strauss 
1050d013d0abSGlenn Strauss     buffer_urldecode_path(&r->uri.path);
1051980554bcSGlenn Strauss     buffer_path_simplify(&r->uri.path);
105235fa47d8SGlenn Strauss     if (r->uri.path.ptr[0] != '/')
105335fa47d8SGlenn Strauss         return http_request_header_line_invalid(r, 400,
105435fa47d8SGlenn Strauss           "uri-path does not begin with '/' -> 400"); /* Bad Request */
1055d013d0abSGlenn Strauss 
1056d013d0abSGlenn Strauss     return 0;
1057d013d0abSGlenn Strauss }
1058d013d0abSGlenn Strauss 
1059d5878718SGlenn Strauss __attribute_cold__
10606f803af0SGlenn Strauss __attribute_noinline__
http_request_parse_header_other(request_st * const restrict r,const char * const restrict k,const int klen,const unsigned int http_header_strict)10619914bb29SGlenn Strauss static int http_request_parse_header_other(request_st * const restrict r, const char * const restrict k, const int klen, const unsigned int http_header_strict) {
10626f803af0SGlenn Strauss     for (int i = 0; i < klen; ++i) {
10636f803af0SGlenn Strauss         if (light_isalpha(k[i]) || k[i] == '-') continue; /*(common cases)*/
1064bcdc6a3bSJan Kneschke         /**
1065bcdc6a3bSJan Kneschke          * 1*<any CHAR except CTLs or separators>
1066b5da12c0SStefan Bühler          * CTLs == 0-31 + 127, CHAR = 7-bit ascii (0..127)
1067bcdc6a3bSJan Kneschke          *
1068bcdc6a3bSJan Kneschke          */
10696f803af0SGlenn Strauss         switch(k[i]) {
107028d6015bSGlenn Strauss         case ' ':
107128d6015bSGlenn Strauss         case '\t':
10726870b0f5SGlenn Strauss             return http_request_header_line_invalid(r, 400, "WS character in key -> 400");
107392f2ac9bSGlenn Strauss         case '\r':
107492f2ac9bSGlenn Strauss         case '\n':
1075bcdc6a3bSJan Kneschke         case '(':
1076bcdc6a3bSJan Kneschke         case ')':
1077bcdc6a3bSJan Kneschke         case '<':
1078bcdc6a3bSJan Kneschke         case '>':
1079bcdc6a3bSJan Kneschke         case '@':
1080bcdc6a3bSJan Kneschke         case ',':
108192f2ac9bSGlenn Strauss         case ':':
1082bcdc6a3bSJan Kneschke         case ';':
1083bcdc6a3bSJan Kneschke         case '\\':
1084bcdc6a3bSJan Kneschke         case '\"':
1085bcdc6a3bSJan Kneschke         case '/':
1086bcdc6a3bSJan Kneschke         case '[':
1087bcdc6a3bSJan Kneschke         case ']':
1088bcdc6a3bSJan Kneschke         case '?':
1089bcdc6a3bSJan Kneschke         case '=':
1090bcdc6a3bSJan Kneschke         case '{':
1091bcdc6a3bSJan Kneschke         case '}':
10926870b0f5SGlenn Strauss             return http_request_header_char_invalid(r, k[i], "invalid character in header key -> 400");
1093b5da12c0SStefan Bühler         default:
10946f803af0SGlenn Strauss             if (http_header_strict ? (k[i] < 32 || ((unsigned char *)k)[i] >= 127) : k[i] == '\0')
10956870b0f5SGlenn Strauss                 return http_request_header_char_invalid(r, k[i], "invalid character in header key -> 400");
10966f803af0SGlenn Strauss             break; /* ok */
1097b5da12c0SStefan Bühler         }
1098bcdc6a3bSJan Kneschke     }
10996f803af0SGlenn Strauss     return 0;
1100bcdc6a3bSJan Kneschke }
11016f803af0SGlenn Strauss 
http_request_parse_headers(request_st * const restrict r,char * const restrict ptr,const unsigned short * const restrict hoff,const unsigned int http_parseopts)11029914bb29SGlenn Strauss static int http_request_parse_headers(request_st * const restrict r, char * const restrict ptr, const unsigned short * const restrict hoff, const unsigned int http_parseopts) {
1103af3313bfSGlenn Strauss     const unsigned int http_header_strict = (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT);
110463e32e81SGlenn Strauss 
110563e32e81SGlenn Strauss   #if 0 /*(not checked here; will later result in invalid label for HTTP header)*/
110663e32e81SGlenn Strauss     int i = hoff[2];
11076f803af0SGlenn Strauss 
11086f803af0SGlenn Strauss     if (ptr[i] == ' ' || ptr[i] == '\t') {
11096870b0f5SGlenn Strauss         return http_request_header_line_invalid(r, 400, "WS at the start of first line -> 400");
1110ad272066SGlenn Strauss     }
111163e32e81SGlenn Strauss   #endif
11126f803af0SGlenn Strauss 
11138fc8ab89SGlenn Strauss     for (int i = 2; i < hoff[0]; ++i) {
111463e32e81SGlenn Strauss         const char *k = ptr + hoff[i];
111563e32e81SGlenn Strauss         /* one past last line hoff[hoff[0]] is to final "\r\n" */
111663e32e81SGlenn Strauss         char *end = ptr + hoff[i+1];
11176f803af0SGlenn Strauss 
111863e32e81SGlenn Strauss         const char *colon = memchr(k, ':', end - k);
11196f803af0SGlenn Strauss         if (NULL == colon)
11206870b0f5SGlenn Strauss             return http_request_header_line_invalid(r, 400, "invalid header missing ':' -> 400");
11216f803af0SGlenn Strauss 
112263e32e81SGlenn Strauss         const char *v = colon + 1;
11236f803af0SGlenn Strauss 
11246f803af0SGlenn Strauss         /* RFC7230 Hypertext Transfer Protocol (HTTP/1.1): Message Syntax and Routing
11256f803af0SGlenn Strauss          * 3.2.4.  Field Parsing
11266f803af0SGlenn Strauss          * [...]
11276f803af0SGlenn Strauss          * No whitespace is allowed between the header field-name and colon.  In
11286f803af0SGlenn Strauss          * the past, differences in the handling of such whitespace have led to
11296f803af0SGlenn Strauss          * security vulnerabilities in request routing and response handling.  A
11306f803af0SGlenn Strauss          * server MUST reject any received request message that contains
11316f803af0SGlenn Strauss          * whitespace between a header field-name and colon with a response code
11326f803af0SGlenn Strauss          * of 400 (Bad Request).  A proxy MUST remove any such whitespace from a
11336f803af0SGlenn Strauss          * response message before forwarding the message downstream.
11346f803af0SGlenn Strauss          */
11356f803af0SGlenn Strauss         /* (line k[-1] is always preceded by a '\n',
11366f803af0SGlenn Strauss          *  including first header after request-line,
11376f803af0SGlenn Strauss          *  so no need to check colon != k) */
11386f803af0SGlenn Strauss         if (colon[-1] == ' ' || colon[-1] == '\t') {
11396f803af0SGlenn Strauss             if (http_header_strict) {
11406870b0f5SGlenn Strauss                 return http_request_header_line_invalid(r, 400, "invalid whitespace between field-name and colon -> 400");
11416f803af0SGlenn Strauss             }
11426f803af0SGlenn Strauss             else {
11436f803af0SGlenn Strauss                 /* remove trailing whitespace from key(if !http_header_strict)*/
11446f803af0SGlenn Strauss                 do { --colon; } while (colon[-1] == ' ' || colon[-1] == '\t');
1145ad272066SGlenn Strauss             }
1146ad272066SGlenn Strauss         }
1147ad272066SGlenn Strauss 
11486f803af0SGlenn Strauss         const int klen = (int)(colon - k);
114963e32e81SGlenn Strauss         if (0 == klen)
11506870b0f5SGlenn Strauss             return http_request_header_line_invalid(r, 400, "invalid header key -> 400");
11516f803af0SGlenn Strauss         const enum http_header_e id = http_header_hkey_get(k, klen);
1152ad272066SGlenn Strauss 
1153d5878718SGlenn Strauss         if (id == HTTP_HEADER_OTHER) {
1154d5878718SGlenn Strauss             for (int j = 0; j < klen; ++j) {
1155d5878718SGlenn Strauss                 if (light_isalpha(k[j]) || k[j] == '-') continue; /*(common cases)*/
11566870b0f5SGlenn Strauss                 if (0 != http_request_parse_header_other(r, k+j, klen-j, http_header_strict))
11576f803af0SGlenn Strauss                     return 400;
1158d5878718SGlenn Strauss                 break;
1159d5878718SGlenn Strauss             }
11606f803af0SGlenn Strauss         }
11616f803af0SGlenn Strauss 
11626f803af0SGlenn Strauss         /* remove leading whitespace from value */
11636f803af0SGlenn Strauss         while (*v == ' ' || *v == '\t') ++v;
11646f803af0SGlenn Strauss 
116541a268b8SGlenn Strauss         for (; i+1 <= hoff[0]; ++i) {
116641a268b8SGlenn Strauss             end = ptr + hoff[i+1];
116741a268b8SGlenn Strauss             if (end[0] != ' ' && end[0] != '\t') break;
116841a268b8SGlenn Strauss 
116941a268b8SGlenn Strauss             /* line folding */
117041a268b8SGlenn Strauss           #ifdef __COVERITY__
117141a268b8SGlenn Strauss             force_assert(end - k >= 2);
117241a268b8SGlenn Strauss           #endif
117341a268b8SGlenn Strauss             if (end[-2] == '\r')
117441a268b8SGlenn Strauss                 end[-2] = ' ';
117541a268b8SGlenn Strauss             else if (http_header_strict)
117641a268b8SGlenn Strauss                 return http_request_header_line_invalid(r, 400, "missing CR before LF in header -> 400");
117741a268b8SGlenn Strauss             end[-1] = ' ';
117841a268b8SGlenn Strauss         }
117941a268b8SGlenn Strauss       #ifdef __COVERITY__
118041a268b8SGlenn Strauss         /*(buf holding k has non-zero request-line, so end[-2] valid)*/
118141a268b8SGlenn Strauss         force_assert(end >= k + 2);
118241a268b8SGlenn Strauss       #endif
118341a268b8SGlenn Strauss         if (end[-2] == '\r')
118441a268b8SGlenn Strauss             --end;
118541a268b8SGlenn Strauss         else if (http_header_strict)
118641a268b8SGlenn Strauss             return http_request_header_line_invalid(r, 400, "missing CR before LF in header -> 400");
118741a268b8SGlenn Strauss         /* remove trailing whitespace from value (+ remove '\r\n') */
118841a268b8SGlenn Strauss         /* (line k[-1] is always preceded by a '\n',
118941a268b8SGlenn Strauss          *  including first header after request-line,
119041a268b8SGlenn Strauss          *  so no need to check (end != k)) */
119141a268b8SGlenn Strauss         do { --end; } while (end[-1] == ' ' || end[-1] == '\t');
119241a268b8SGlenn Strauss 
11936f803af0SGlenn Strauss         const int vlen = (int)(end - v);
11946f803af0SGlenn Strauss         /* empty header-fields are not allowed by HTTP-RFC, we just ignore them */
119563e32e81SGlenn Strauss         if (vlen <= 0) continue; /* ignore header */
11966f803af0SGlenn Strauss 
11976f803af0SGlenn Strauss         if (http_header_strict) {
1198438daddeSGlenn Strauss             const char * const x = http_request_check_line_strict(v, vlen);
1199438daddeSGlenn Strauss             if (x)
1200438daddeSGlenn Strauss                 return http_request_header_char_invalid(r, *x,
1201438daddeSGlenn Strauss                   "invalid character in header -> 400");
120263e32e81SGlenn Strauss         } /* else URI already checked in http_request_parse_reqline() for any '\0' */
12036f803af0SGlenn Strauss 
12046870b0f5SGlenn Strauss         int status = http_request_parse_single_header(r, id, k, (size_t)klen, v, (size_t)vlen);
12056f803af0SGlenn Strauss         if (0 != status) return status;
12066f803af0SGlenn Strauss     }
12076f803af0SGlenn Strauss 
12086f803af0SGlenn Strauss     return 0;
12096f803af0SGlenn Strauss }
12106f803af0SGlenn Strauss 
12116971c6c8SGlenn Strauss 
12126971c6c8SGlenn Strauss static int
http_request_parse(request_st * const restrict r,const int scheme_port)12138fc8ab89SGlenn Strauss http_request_parse (request_st * const restrict r, const int scheme_port)
12146971c6c8SGlenn Strauss {
12158fc8ab89SGlenn Strauss     int status = http_request_parse_target(r, scheme_port);
12166f803af0SGlenn Strauss     if (0 != status) return status;
1217bcdc6a3bSJan Kneschke 
121863e32e81SGlenn Strauss     /* post-processing */
12198fc8ab89SGlenn Strauss     const unsigned int http_parseopts = r->conf.http_parseopts;
1220bcdc6a3bSJan Kneschke 
122144997909SJan Kneschke     /* check hostname field if it is set */
12223a9a3716SGlenn Strauss     /*(r->http_host might not be set until after parsing request headers)*/
12233a9a3716SGlenn Strauss     if (__builtin_expect( (r->http_host != NULL), 1)) {
12246870b0f5SGlenn Strauss         if (0 != http_request_host_policy(r->http_host,
1225af3313bfSGlenn Strauss                                           http_parseopts, scheme_port))
12266870b0f5SGlenn Strauss             return http_request_header_line_invalid(r, 400, "Invalid Hostname -> 400");
12278c5acaaeSGlenn Strauss         buffer_copy_buffer(&r->uri.authority, r->http_host);
1228bcdc6a3bSJan Kneschke     }
1229fa4ab192SGlenn Strauss     else {
12303a9a3716SGlenn Strauss         buffer_copy_string_len(&r->uri.authority, CONST_STR_LEN(""));
1231f37c90bcSGlenn Strauss         if (r->http_version >= HTTP_VERSION_1_1)
12326870b0f5SGlenn Strauss             return http_request_header_line_invalid(r, 400, "HTTP/1.1 but Host missing -> 400");
1233fa4ab192SGlenn Strauss     }
1234bcdc6a3bSJan Kneschke 
1235e8a6ed6eSGlenn Strauss     if (HTTP_VERSION_1_1 != r->http_version
1236e8a6ed6eSGlenn Strauss         && (r->rqst_htags
1237e8a6ed6eSGlenn Strauss             & (light_bshift(HTTP_HEADER_UPGRADE)
1238e8a6ed6eSGlenn Strauss               |light_bshift(HTTP_HEADER_HTTP2_SETTINGS)))) {
1239e8a6ed6eSGlenn Strauss         return http_request_header_line_invalid(r, 400, "invalid hop-by-hop header w/o HTTP/1.1 -> 400");
1240e8a6ed6eSGlenn Strauss     }
1241e8a6ed6eSGlenn Strauss 
1242af5df352SGlenn Strauss     if (0 == r->reqbody_length) {
1243fa4ab192SGlenn Strauss         /* POST requires Content-Length (or Transfer-Encoding)
1244af5df352SGlenn Strauss          * (-1 == r->reqbody_length when Transfer-Encoding: chunked)*/
12456870b0f5SGlenn Strauss         if (HTTP_METHOD_POST == r->http_method
12469c8981a7SGlenn Strauss             && !light_btst(r->rqst_htags, HTTP_HEADER_CONTENT_LENGTH)) {
12476870b0f5SGlenn Strauss             return http_request_header_line_invalid(r, 411, "POST-request, but content-length missing -> 411");
1248fa4ab192SGlenn Strauss         }
1249fa4ab192SGlenn Strauss     }
1250fa4ab192SGlenn Strauss     else {
1251af5df352SGlenn Strauss         /* (-1 == r->reqbody_length when Transfer-Encoding: chunked)*/
1252af5df352SGlenn Strauss         if (-1 == r->reqbody_length
12539c8981a7SGlenn Strauss             && light_btst(r->rqst_htags, HTTP_HEADER_CONTENT_LENGTH)) {
1254fa4ab192SGlenn Strauss             /* RFC7230 Hypertext Transfer Protocol (HTTP/1.1): Message Syntax and Routing
1255fa4ab192SGlenn Strauss              * 3.3.3.  Message Body Length
1256fa4ab192SGlenn Strauss              * [...]
1257fa4ab192SGlenn Strauss              * If a message is received with both a Transfer-Encoding and a
1258fa4ab192SGlenn Strauss              * Content-Length header field, the Transfer-Encoding overrides the
1259fa4ab192SGlenn Strauss              * Content-Length.  Such a message might indicate an attempt to
1260fa4ab192SGlenn Strauss              * perform request smuggling (Section 9.5) or response splitting
1261fa4ab192SGlenn Strauss              * (Section 9.4) and ought to be handled as an error.  A sender MUST
1262fa4ab192SGlenn Strauss              * remove the received Content-Length field prior to forwarding such
1263fa4ab192SGlenn Strauss              * a message downstream.
1264fa4ab192SGlenn Strauss              */
1265fa4ab192SGlenn Strauss             const unsigned int http_header_strict =
1266af3313bfSGlenn Strauss               (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT);
1267fa4ab192SGlenn Strauss             if (http_header_strict) {
12686870b0f5SGlenn Strauss                 return http_request_header_line_invalid(r, 400, "invalid Transfer-Encoding + Content-Length -> 400");
1269fa4ab192SGlenn Strauss             }
1270fa4ab192SGlenn Strauss             else {
1271fa4ab192SGlenn Strauss                 /* ignore Content-Length */
12727c7f8c46SGlenn Strauss                 http_header_request_unset(r, HTTP_HEADER_CONTENT_LENGTH, CONST_STR_LEN("Content-Length"));
1273fa4ab192SGlenn Strauss             }
1274fa4ab192SGlenn Strauss         }
12756870b0f5SGlenn Strauss         if (http_method_get_or_head(r->http_method)
1276af3313bfSGlenn Strauss             && !(http_parseopts & HTTP_PARSEOPT_METHOD_GET_BODY)) {
12776870b0f5SGlenn Strauss             return http_request_header_line_invalid(r, 400, "GET/HEAD with content-length -> 400");
1278580823f7SJan Kneschke         }
1279bcdc6a3bSJan Kneschke     }
1280bcdc6a3bSJan Kneschke 
1281bcdc6a3bSJan Kneschke     return 0;
1282bcdc6a3bSJan Kneschke }
12836971c6c8SGlenn Strauss 
12846971c6c8SGlenn Strauss 
12858fc8ab89SGlenn Strauss static int
http_request_parse_hoff(request_st * const restrict r,char * const restrict hdrs,const unsigned short * const restrict hoff,const int scheme_port)12868fc8ab89SGlenn Strauss http_request_parse_hoff (request_st * const restrict r, char * const restrict hdrs, const unsigned short * const restrict hoff, const int scheme_port)
12876971c6c8SGlenn Strauss {
12888fc8ab89SGlenn Strauss     /*
12898fc8ab89SGlenn Strauss      * Request: "^(GET|POST|HEAD|...) ([^ ]+(\\?[^ ]+|)) (HTTP/1\\.[01])$"
12908fc8ab89SGlenn Strauss      * Header : "^([-a-zA-Z]+): (.+)$"
12918fc8ab89SGlenn Strauss      * End    : "^$"
12928fc8ab89SGlenn Strauss      */
12936971c6c8SGlenn Strauss 
12948fc8ab89SGlenn Strauss     int status;
12958fc8ab89SGlenn Strauss     const unsigned int http_parseopts = r->conf.http_parseopts;
12968fc8ab89SGlenn Strauss 
12978fc8ab89SGlenn Strauss     status = http_request_parse_reqline(r, hdrs, hoff, http_parseopts);
12988fc8ab89SGlenn Strauss     if (0 != status) return status;
12998fc8ab89SGlenn Strauss 
13008fc8ab89SGlenn Strauss     status = http_request_parse_headers(r, hdrs, hoff, http_parseopts);
13018fc8ab89SGlenn Strauss     if (0 != status) return status;
13028fc8ab89SGlenn Strauss 
13038fc8ab89SGlenn Strauss     return http_request_parse(r, scheme_port);
13048fc8ab89SGlenn Strauss }
13058fc8ab89SGlenn Strauss 
13068fc8ab89SGlenn Strauss 
13078fc8ab89SGlenn Strauss static void
http_request_headers_fin(request_st * const restrict r)13088fc8ab89SGlenn Strauss http_request_headers_fin (request_st * const restrict r)
13098fc8ab89SGlenn Strauss {
13106971c6c8SGlenn Strauss     if (0 == r->http_status) {
13116971c6c8SGlenn Strauss       #if 0
13126971c6c8SGlenn Strauss         r->conditional_is_valid = (1 << COMP_SERVER_SOCKET)
13136971c6c8SGlenn Strauss                                 | (1 << COMP_HTTP_SCHEME)
13146971c6c8SGlenn Strauss                                 | (1 << COMP_HTTP_HOST)
13156971c6c8SGlenn Strauss                                 | (1 << COMP_HTTP_REMOTE_IP)
13166971c6c8SGlenn Strauss                                 | (1 << COMP_HTTP_REQUEST_METHOD)
13176971c6c8SGlenn Strauss                                 | (1 << COMP_HTTP_URL)
13186971c6c8SGlenn Strauss                                 | (1 << COMP_HTTP_QUERY_STRING)
13196971c6c8SGlenn Strauss                                 | (1 << COMP_HTTP_REQUEST_HEADER);
13206971c6c8SGlenn Strauss       #else
13216971c6c8SGlenn Strauss         /* all config conditions are valid after parsing header
13226971c6c8SGlenn Strauss          * (set all bits; remove dependency on plugin_config.h) */
13236971c6c8SGlenn Strauss         r->conditional_is_valid = ~0u;
13246971c6c8SGlenn Strauss       #endif
13256971c6c8SGlenn Strauss     }
13266971c6c8SGlenn Strauss     else {
13276971c6c8SGlenn Strauss         r->keep_alive = 0;
13286971c6c8SGlenn Strauss         r->reqbody_length = 0;
13298fc8ab89SGlenn Strauss     }
13308fc8ab89SGlenn Strauss }
13316971c6c8SGlenn Strauss 
13328fc8ab89SGlenn Strauss 
13338fc8ab89SGlenn Strauss void
http_request_headers_process(request_st * const restrict r,char * const restrict hdrs,const unsigned short * const restrict hoff,const int scheme_port)13348fc8ab89SGlenn Strauss http_request_headers_process (request_st * const restrict r, char * const restrict hdrs, const unsigned short * const restrict hoff, const int scheme_port)
13358fc8ab89SGlenn Strauss {
13368fc8ab89SGlenn Strauss     r->http_status = http_request_parse_hoff(r, hdrs, hoff, scheme_port);
13378fc8ab89SGlenn Strauss 
13388fc8ab89SGlenn Strauss     http_request_headers_fin(r);
13398fc8ab89SGlenn Strauss 
13407a078f56SGlenn Strauss     if (__builtin_expect( (0 != r->http_status), 0)) {
13416971c6c8SGlenn Strauss         if (r->conf.log_request_header_on_error) {
13428fc8ab89SGlenn Strauss             /*(http_request_parse_headers() modifies hdrs only to
13436971c6c8SGlenn Strauss              * undo line-wrapping in-place using spaces)*/
13447a21b385SGlenn Strauss             log_error_multiline(r->conf.errh, __FILE__, __LINE__,
13457a21b385SGlenn Strauss                                 hdrs, r->rqst_header_len, "rqst: ");
13466971c6c8SGlenn Strauss         }
13476971c6c8SGlenn Strauss     }
13486971c6c8SGlenn Strauss }
13498fc8ab89SGlenn Strauss 
13508fc8ab89SGlenn Strauss 
13518fc8ab89SGlenn Strauss void
http_request_headers_process_h2(request_st * const restrict r,const int scheme_port)13528fc8ab89SGlenn Strauss http_request_headers_process_h2 (request_st * const restrict r, const int scheme_port)
13538fc8ab89SGlenn Strauss {
13548fc8ab89SGlenn Strauss     if (0 == r->http_status)
13558fc8ab89SGlenn Strauss         r->http_status = http_request_parse(r, scheme_port);
13568fc8ab89SGlenn Strauss 
13578fc8ab89SGlenn Strauss     if (0 == r->http_status) {
13589c8981a7SGlenn Strauss         if (light_btst(r->rqst_htags, HTTP_HEADER_CONNECTION))
13598fc8ab89SGlenn Strauss             r->http_status = http_request_header_line_invalid(r, 400,
13608fc8ab89SGlenn Strauss               "invalid Connection header with HTTP/2 -> 400");
13618fc8ab89SGlenn Strauss     }
13628fc8ab89SGlenn Strauss 
13638fc8ab89SGlenn Strauss     http_request_headers_fin(r);
13648fc8ab89SGlenn Strauss 
136535fa47d8SGlenn Strauss     /* limited; headers not collected into a single buf for HTTP/2 */
136635fa47d8SGlenn Strauss     if (__builtin_expect( (0 != r->http_status), 0)) {
13678fc8ab89SGlenn Strauss         if (r->conf.log_request_header_on_error) {
13688fc8ab89SGlenn Strauss             log_error(r->conf.errh, __FILE__, __LINE__,
136935fa47d8SGlenn Strauss               "request-header:\n:authority: %s\n:method: %s\n:path: %s",
137035fa47d8SGlenn Strauss               r->http_host ? r->http_host->ptr : "",
13719fe8fbaaSGlenn Strauss               http_method_buf(r->http_method)->ptr,
1372af3df29aSGlenn Strauss               !buffer_is_blank(&r->target) ? r->target.ptr : "");
13738fc8ab89SGlenn Strauss         }
13748fc8ab89SGlenn Strauss     }
13758fc8ab89SGlenn Strauss 
13768fc8ab89SGlenn Strauss     /* ignore Upgrade if using HTTP/2 */
13779c8981a7SGlenn Strauss     if (light_btst(r->rqst_htags, HTTP_HEADER_UPGRADE))
13788fc8ab89SGlenn Strauss         http_header_request_unset(r, HTTP_HEADER_UPGRADE,
13798fc8ab89SGlenn Strauss                                   CONST_STR_LEN("upgrade"));
13808fc8ab89SGlenn Strauss     /* XXX: should filter out other hop-by-hop connection headers, too */
13818fc8ab89SGlenn Strauss }
1382