1 /*
2 * request - HTTP request processing
3 *
4 * Fully-rewritten from original
5 * Copyright(c) 2018 Glenn Strauss gstrauss()gluelogic.com All rights reserved
6 * License: BSD 3-clause (same as lighttpd)
7 */
8 #include "first.h"
9
10 #include "request.h"
11 #include "burl.h"
12 #include "http_header.h"
13 #include "http_kv.h"
14 #include "log.h"
15 #include "sock_addr.h"
16
17 #include <limits.h>
18 #include <stdint.h>
19 #include <stdlib.h>
20 #include <string.h>
21
22
23 __attribute_cold__
24 __attribute_noinline__
25 void
http_request_state_append(buffer * const b,request_state_t state)26 http_request_state_append (buffer * const b, request_state_t state)
27 {
28 static const struct sn { const char *s; uint32_t n; } states[] = {
29 { CONST_STR_LEN("connect") }
30 ,{ CONST_STR_LEN("req-start") }
31 ,{ CONST_STR_LEN("read") }
32 ,{ CONST_STR_LEN("req-end") }
33 ,{ CONST_STR_LEN("readpost") }
34 ,{ CONST_STR_LEN("handle-req") }
35 ,{ CONST_STR_LEN("resp-start") }
36 ,{ CONST_STR_LEN("write") }
37 ,{ CONST_STR_LEN("resp-end") }
38 ,{ CONST_STR_LEN("error") }
39 ,{ CONST_STR_LEN("close") }
40 ,{ CONST_STR_LEN("(unknown)") }
41 };
42 const struct sn * const p =
43 states +((uint32_t)state <= CON_STATE_CLOSE ? state : CON_STATE_CLOSE+1);
44 buffer_append_string_len(b, p->s, p->n);
45 }
46
47 __attribute_cold__
48 __attribute_noinline__
49 __attribute_pure__
50 const char *
http_request_state_short(request_state_t state)51 http_request_state_short (request_state_t state)
52 {
53 /*((char *) returned, but caller must use only one char)*/
54 static const char sstates[] = ".qrQRhsWSECx";
55 return
56 sstates+((uint32_t)state <= CON_STATE_CLOSE ? state : CON_STATE_CLOSE+1);
57 }
58
59
60 __attribute_noinline__
__attribute_nonnull__()61 __attribute_nonnull__()
62 __attribute_pure__
63 static const char * http_request_check_uri_strict (const uint8_t * const restrict s, const uint_fast32_t len) {
64 for (uint_fast32_t i = 0; i < len; ++i) {
65 if (__builtin_expect( (s[i] <= 32), 0)) return (const char *)s+i;
66 if (__builtin_expect( (s[i] == 127), 0)) return (const char *)s+i;
67 if (__builtin_expect( (s[i] == 255), 0)) return (const char *)s+i;
68 }
69 return NULL;
70 }
71
__attribute_nonnull__()72 __attribute_nonnull__()
73 __attribute_pure__
74 static const char * http_request_check_line_strict (const char * const restrict s, const uint_fast32_t len) {
75 for (uint_fast32_t i = 0; i < len; ++i) {
76 if (__builtin_expect( (((const uint8_t *)s)[i]<32), 0) && s[i] != '\t')
77 return s+i;
78 if (__builtin_expect( (s[i] == 127), 0))
79 return s+i;
80 }
81 return NULL;
82 }
83
__attribute_nonnull__()84 __attribute_nonnull__()
85 __attribute_pure__
86 static const char * http_request_check_line_minimal (const char * const restrict s, const uint_fast32_t len) {
87 for (uint_fast32_t i = 0; i < len; ++i) {
88 if (__builtin_expect( (s[i] == '\0'), 0)) return s+i;
89 if (__builtin_expect( (s[i] == '\r'), 0)) return s+i;
90 if (__builtin_expect( (s[i] == '\n'), 0)) return s+i;
91 }
92 return NULL;
93 }
94
request_check_hostname(buffer * const host)95 static int request_check_hostname(buffer * const host) {
96 /*
97 * hostport = host [ ":" port ]
98 * host = hostname | IPv4address | IPv6address
99 * hostname = *( domainlabel "." ) toplabel [ "." ]
100 * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
101 * toplabel = alpha | alpha *( alphanum | "-" ) alphanum
102 * IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit
103 * IPv6address = "[" ... "]"
104 * port = *digit
105 */
106
107 const char *h = host->ptr;
108
109 if (*h != '[') {
110 uint32_t len = buffer_clen(host);
111 const char * const colon = memchr(h, ':', len);
112 uint32_t hlen = colon ? (uint32_t)(colon - h) : len;
113
114 /* if hostname ends in ".", strip it */
115 if (__builtin_expect( (0 == hlen), 0)) return -1;
116 if (__builtin_expect( (h[hlen-1] == '.'), 0)) {
117 /* shift port info one left */
118 if (--hlen == 0) return -1;
119 --len;
120 if (NULL != colon)
121 memmove(host->ptr+hlen, colon, len - hlen);
122 buffer_truncate(host, len);
123 }
124
125 int label_len = 0;
126 int allnumeric = 1;
127 int numeric = 1;
128 int level = 0;
129 for (uint32_t i = 0; i < hlen; ++i) {
130 const int ch = h[i];
131 ++label_len;
132 if (light_isdigit(ch))
133 continue;
134 else if ((light_isalpha(ch) || (ch == '-' && i != 0)))
135 numeric = 0;
136 else if (ch == '.' && 1 != label_len && '-' != h[i+1]) {
137 allnumeric &= numeric;
138 numeric = 1;
139 label_len = 0;
140 ++level;
141 }
142 else
143 return -1;
144 }
145 /* (if last segment numeric, then IPv4 and must have 4 numeric parts) */
146 if (0 == label_len || (numeric && (level != 3 || !allnumeric)))
147 return -1;
148
149 h += hlen;
150 }
151 else { /* IPv6 address */
152 /* check the address inside [...]; note: not fully validating */
153 /* (note: not allowing scoped literals, e.g. %eth0 suffix) */
154 ++h; /* step past '[' */
155 int cnt = 0;
156 while (light_isxdigit(*h) || *h == '.' || (*h == ':' && ++cnt < 8)) ++h;
157 /*(invalid char, too many ':', missing ']', or empty "[]")*/
158 if (*h != ']' || h - host->ptr == 1) return -1;
159 ++h; /* step past ']' */
160 }
161
162 /* check numerical port, if present */
163 if (*h == ':') {
164 if (__builtin_expect( (h[1] == '\0'), 0)) /*(remove trailing colon)*/
165 buffer_truncate(host, h - host->ptr);
166 do { ++h; } while (light_isdigit(*h));
167 }
168
169 return (*h == '\0') ? 0 : -1;
170 }
171
http_request_host_normalize(buffer * const b,const int scheme_port)172 int http_request_host_normalize(buffer * const b, const int scheme_port) {
173 /*
174 * check for and canonicalize numeric IP address and portnum (optional)
175 * (IP address may be followed by ":portnum" (optional))
176 * - IPv6: "[...]"
177 * - IPv4: "x.x.x.x"
178 * - IPv4: 12345678 (32-bit decimal number)
179 * - IPv4: 012345678 (32-bit octal number)
180 * - IPv4: 0x12345678 (32-bit hex number)
181 *
182 * allow any chars (except ':' and '\0' and stray '[' or ']')
183 * (other code may check chars more strictly or more pedantically)
184 * ':' delimits (optional) port at end of string
185 * "[]" wraps IPv6 address literal
186 * '\0' should have been rejected earlier were it present
187 *
188 * any chars includes, but is not limited to:
189 * - allow '-' any where, even at beginning of word
190 * (security caution: might be confused for cmd flag if passed to shell)
191 * - allow all-digit TLDs
192 * (might be mistaken for IPv4 addr by inet_aton()
193 * unless non-digits appear in subdomain)
194 */
195
196 /* Note: not using getaddrinfo() since it does not support "[]" around IPv6
197 * and is not as lenient as inet_aton() and inet_addr() for IPv4 strings.
198 * Not using inet_pton() (when available) on IPv4 for similar reasons. */
199
200 const char * const p = b->ptr;
201 const size_t blen = buffer_clen(b);
202 long port = 0;
203
204 if (*p != '[') {
205 char * const colon = (char *)memchr(p, ':', blen);
206 if (colon) {
207 if (*p == ':') return -1; /*(empty host then port, or naked IPv6)*/
208 if (colon[1] != '\0') {
209 char *e;
210 port = strtol(colon+1, &e, 0); /*(allow decimal, octal, hex)*/
211 if (0 < port && port <= USHRT_MAX && *e == '\0') {
212 /* valid port */
213 } else {
214 return -1;
215 }
216 } /*(else ignore stray colon at string end)*/
217 buffer_truncate(b, (size_t)(colon - p)); /*(remove port str)*/
218 }
219
220 if (light_isdigit(*p)) do {
221 /* (IPv4 address literal or domain starting w/ digit (e.g. 3com))*/
222 /* (check one-element cache of normalized IPv4 address string) */
223 static struct { char s[INET_ADDRSTRLEN]; size_t n; } laddr;
224 size_t n = colon ? (size_t)(colon - p) : blen;
225 sock_addr addr;
226 if (n == laddr.n && 0 == memcmp(p, laddr.s, n)) break;
227 if (1 == sock_addr_inet_pton(&addr, p, AF_INET, 0)) {
228 sock_addr_inet_ntop_copy_buffer(b, &addr);
229 n = buffer_clen(b);
230 if (n < sizeof(laddr.s)) memcpy(laddr.s, b->ptr, (laddr.n = n));
231 }
232 } while (0);
233 } else do { /* IPv6 addr */
234 #if defined(HAVE_IPV6) && defined(HAVE_INET_PTON)
235
236 /* (check one-element cache of normalized IPv4 address string) */
237 static struct { char s[INET6_ADDRSTRLEN]; size_t n; } laddr;
238 sock_addr addr;
239 char *bracket = b->ptr+blen-1;
240 char *percent = strchr(b->ptr+1, '%');
241 size_t len;
242 int rc;
243 char buf[INET6_ADDRSTRLEN+16]; /*(+16 for potential %interface name)*/
244 if (blen <= 2) return -1; /*(invalid "[]")*/
245 if (*bracket != ']') {
246 bracket = (char *)memchr(b->ptr+1, ']', blen-1);
247 if (NULL == bracket || bracket[1] != ':' || bracket - b->ptr == 1){
248 return -1;
249 }
250 if (bracket[2] != '\0') { /*(ignore stray colon at string end)*/
251 char *e;
252 port = strtol(bracket+2, &e, 0); /*(allow decimal, octal, hex)*/
253 if (0 < port && port <= USHRT_MAX && *e == '\0') {
254 /* valid port */
255 } else {
256 return -1;
257 }
258 }
259 }
260
261 len = (size_t)((percent ? percent : bracket) - (b->ptr+1));
262 if (laddr.n == len && 0 == memcmp(laddr.s, b->ptr+1, len)) {
263 /* truncate after ']' and re-add normalized port, if needed */
264 buffer_truncate(b, (size_t)(bracket - b->ptr + 1));
265 break;
266 }
267
268 *bracket = '\0';/*(terminate IPv6 string)*/
269 if (percent) *percent = '\0'; /*(remove %interface from address)*/
270 rc = sock_addr_inet_pton(&addr, b->ptr+1, AF_INET6, 0);
271 if (percent) *percent = '%'; /*(restore %interface)*/
272 *bracket = ']'; /*(restore bracket)*/
273 if (1 != rc) return -1;
274
275 sock_addr_inet_ntop(&addr, buf, sizeof(buf));
276 len = strlen(buf);
277 if (percent) {
278 if (percent > bracket) return -1;
279 if (len + (size_t)(bracket - percent) >= sizeof(buf)) return -1;
280 if (len < sizeof(laddr.s)) memcpy(laddr.s, buf, (laddr.n = len));
281 memcpy(buf+len, percent, (size_t)(bracket - percent));
282 len += (size_t)(bracket - percent);
283 }
284 buffer_truncate(b, 1); /* truncate after '[' */
285 buffer_append_str2(b, buf, len, CONST_STR_LEN("]"));
286
287 #else
288
289 return -1;
290
291 #endif
292 } while (0);
293
294 if (0 != port && port != scheme_port) {
295 buffer_append_char(b, ':');
296 buffer_append_int(b, (int)port);
297 }
298
299 return 0;
300 }
301
http_request_host_policy(buffer * const b,const unsigned int http_parseopts,const int scheme_port)302 int http_request_host_policy (buffer * const b, const unsigned int http_parseopts, const int scheme_port) {
303 /* caller should lowercase, as is done in http_request_header_set_Host(),
304 * for consistency in case the value is used prior to calling policy func */
305 /*buffer_to_lower(b);*/
306 return (((http_parseopts & HTTP_PARSEOPT_HOST_STRICT)
307 ? 0 != request_check_hostname(b)
308 : NULL != http_request_check_line_minimal(BUF_PTR_LEN(b)))
309 || ((http_parseopts & HTTP_PARSEOPT_HOST_NORMALIZE)
310 && 0 != http_request_host_normalize(b, scheme_port)));
311 }
312
313 __attribute_cold__
314 __attribute_noinline__
http_request_header_line_invalid(request_st * const restrict r,const int status,const char * const restrict msg)315 static int http_request_header_line_invalid(request_st * const restrict r, const int status, const char * const restrict msg) {
316 if (r->conf.log_request_header_on_error) {
317 if (msg) log_error(r->conf.errh, __FILE__, __LINE__, "%s", msg);
318 }
319 return status;
320 }
321
322 __attribute_cold__
323 __attribute_noinline__
http_request_header_char_invalid(request_st * const restrict r,const char ch,const char * const restrict msg)324 static int http_request_header_char_invalid(request_st * const restrict r, const char ch, const char * const restrict msg) {
325 if (r->conf.log_request_header_on_error) {
326 if ((unsigned char)ch > 32 && ch != 127) {
327 log_error(r->conf.errh, __FILE__, __LINE__, "%s ('%c')", msg, ch);
328 }
329 else {
330 log_error(r->conf.errh, __FILE__, __LINE__, "%s (0x%x)", msg, ch);
331 }
332 }
333 return 400;
334 }
335
336
337 __attribute_noinline__
http_request_header_set_Host(request_st * const restrict r,const char * const h,size_t hlen)338 static void http_request_header_set_Host(request_st * const restrict r, const char * const h, size_t hlen)
339 {
340 r->http_host = http_header_request_set_ptr(r, HTTP_HEADER_HOST,
341 CONST_STR_LEN("Host"));
342 buffer_copy_string_len_lc(r->http_host, h, hlen);
343 }
344
345
346 int64_t
li_restricted_strtoint64(const char * v,const uint32_t vlen,const char ** const err)347 li_restricted_strtoint64 (const char *v, const uint32_t vlen, const char ** const err)
348 {
349 /* base 10 strtoll() parsing exactly vlen chars and requiring digits 0-9 */
350 /* rejects negative numbers and considers values > INT64_MAX an error */
351 /* note: errno is not set; detect error if *err != v+vlen upon return */
352 /*(caller must check 0 == vlen if that is to be an error for caller)*/
353 int64_t rv = 0;
354 uint32_t i;
355 for (i = 0; i < vlen; ++i) {
356 const uint8_t c = ((uint8_t *)v)[i] - '0'; /*(unsigned; underflow ok)*/
357 if (c > 9) break;
358 if (rv > INT64_MAX/10) break;
359 rv *= 10;
360 if (rv > INT64_MAX - c) break;
361 rv += c;
362 }
363 *err = v+i;
364 return rv;
365 }
366
367
368 __attribute_cold__
http_request_parse_duplicate(request_st * const restrict r,const enum http_header_e id,const char * const restrict k,const size_t klen,const char * const restrict v,const size_t vlen)369 static int http_request_parse_duplicate(request_st * const restrict r, const enum http_header_e id, const char * const restrict k, const size_t klen, const char * const restrict v, const size_t vlen) {
370 /* Proxies sometimes send dup headers
371 * if they are the same we ignore the second
372 * if not, we raise an error */
373 const buffer * const vb = http_header_request_get(r, id, k, klen);
374 if (vb && buffer_eq_icase_slen(vb, v, vlen))
375 return 0; /* ignore header; matches existing header */
376
377 const char *errmsg;
378 switch (id) {
379 case HTTP_HEADER_HOST:
380 errmsg = "duplicate Host header -> 400";
381 break;
382 case HTTP_HEADER_CONTENT_TYPE:
383 errmsg = "duplicate Content-Type header -> 400";
384 break;
385 case HTTP_HEADER_IF_MODIFIED_SINCE:
386 errmsg = "duplicate If-Modified-Since header -> 400";
387 break;
388 case HTTP_HEADER_HTTP2_SETTINGS:
389 errmsg = "duplicate HTTP2-Settings header -> 400";
390 break;
391 default:
392 errmsg = "duplicate header -> 400";
393 break;
394 case HTTP_HEADER_IF_NONE_MATCH:
395 /* if dup, only the first one will survive */
396 return 0; /* ignore header */
397 }
398 return http_request_header_line_invalid(r, 400, errmsg);
399 }
400
401
402 /* add header to list of headers
403 * certain headers are also parsed
404 * might drop a header if deemed unnecessary/broken
405 *
406 * returns 0 on success, HTTP status on error
407 */
http_request_parse_single_header(request_st * const restrict r,const enum http_header_e id,const char * const restrict k,const size_t klen,const char * const restrict v,const size_t vlen)408 static int http_request_parse_single_header(request_st * const restrict r, const enum http_header_e id, const char * const restrict k, const size_t klen, const char * const restrict v, const size_t vlen) {
409 /*
410 * Note: k might not be '\0'-terminated
411 * Note: v is not '\0'-terminated
412 * With lighttpd HTTP/1.1 parser, v ends with whitespace
413 * (one of '\r' '\n' ' ' '\t')
414 * With lighttpd HTTP/2 parser, v should not be accessed beyond vlen
415 * (care must be taken to avoid libc funcs which expect z-strings)
416 */
417 /*assert(vlen);*//*(caller must not call this func with 0 klen or 0 vlen)*/
418
419 switch (id) {
420 /*case HTTP_HEADER_OTHER:*/
421 default:
422 break;
423 case HTTP_HEADER_HOST:
424 if (!light_btst(r->rqst_htags, HTTP_HEADER_HOST)) {
425 if (vlen >= 1024) { /*(expecting < 256)*/
426 return http_request_header_line_invalid(r, 400, "uri-authority too long -> 400");
427 }
428 /*(http_request_header_append() plus sets r->http_host)*/
429 http_request_header_set_Host(r, v, vlen);
430 return 0;
431 }
432 else if (NULL != r->http_host
433 && __builtin_expect( buffer_eq_slen(r->http_host,v,vlen), 1)) {
434 /* ignore all Host: headers if match authority in request line */
435 /* (expect Host to match case in :authority of HTTP/2 request) */
436 return 0; /* ignore header */
437 }
438 /* else parse duplicate for match or error */
439 __attribute_fallthrough__
440 case HTTP_HEADER_IF_MODIFIED_SINCE:
441 case HTTP_HEADER_IF_NONE_MATCH:
442 case HTTP_HEADER_CONTENT_TYPE:
443 case HTTP_HEADER_HTTP2_SETTINGS:
444 if (light_btst(r->rqst_htags, id))
445 return http_request_parse_duplicate(r, id, k, klen, v, vlen);
446 break;
447 case HTTP_HEADER_CONNECTION:
448 /* "Connection: close" is common case if header is present */
449 if ((vlen == 5 && buffer_eq_icase_ssn(v, CONST_STR_LEN("close")))
450 || http_header_str_contains_token(v,vlen,CONST_STR_LEN("close"))) {
451 r->keep_alive = 0;
452 break;
453 }
454 if (http_header_str_contains_token(v,vlen,CONST_STR_LEN("keep-alive"))){
455 r->keep_alive = 1;
456 break;
457 }
458 break;
459 case HTTP_HEADER_CONTENT_LENGTH:
460 if (!light_btst(r->rqst_htags, HTTP_HEADER_CONTENT_LENGTH)) {
461 /*(trailing whitespace was removed from vlen)*/
462 /*(not using strtoll() since v might not be z-string)*/
463 const char *err;
464 off_t clen = (off_t)li_restricted_strtoint64(v, vlen, &err);
465 if (err == v+vlen) {
466 /* (set only if not set to -1 by Transfer-Encoding: chunked) */
467 if (0 == r->reqbody_length) r->reqbody_length = clen;
468 }
469 else {
470 return http_request_header_line_invalid(r, 400, "invalid Content-Length header -> 400");
471 }
472 }
473 else {
474 return http_request_header_line_invalid(r, 400, "duplicate Content-Length header -> 400");
475 }
476 break;
477 case HTTP_HEADER_TRANSFER_ENCODING:
478 if (HTTP_VERSION_1_1 != r->http_version) {
479 return http_request_header_line_invalid(r, 400,
480 HTTP_VERSION_1_0 == r->http_version
481 ? "HTTP/1.0 with Transfer-Encoding (bad HTTP/1.0 proxy?) -> 400"
482 : "HTTP/2 with Transfer-Encoding is invalid -> 400");
483 }
484
485 if (!buffer_eq_icase_ss(v, vlen, CONST_STR_LEN("chunked"))) {
486 /* Transfer-Encoding might contain additional encodings,
487 * which are not currently supported by lighttpd */
488 return http_request_header_line_invalid(r, 501, NULL); /* Not Implemented */
489 }
490 r->reqbody_length = -1;
491
492 /* Transfer-Encoding is a hop-by-hop header,
493 * which must not be blindly forwarded to backends */
494 return 0; /* skip header */
495 }
496
497 http_header_request_append(r, id, k, klen, v, vlen);
498 return 0;
499 }
500
501 __attribute_cold__
http_request_parse_proto_loose(request_st * const restrict r,const char * const restrict ptr,const size_t len,const unsigned int http_parseopts)502 static int http_request_parse_proto_loose(request_st * const restrict r, const char * const restrict ptr, const size_t len, const unsigned int http_parseopts) {
503 const char * proto = memchr(ptr, ' ', len);
504 if (NULL == proto)
505 return http_request_header_line_invalid(r, 400, "incomplete request line -> 400");
506 proto = memchr(proto+1, ' ', len - (proto+1 - ptr));
507 if (NULL == proto)
508 return http_request_header_line_invalid(r, 400, "incomplete request line -> 400");
509 ++proto;
510
511 if (proto[0]=='H' && proto[1]=='T' && proto[2]=='T' && proto[3]=='P' && proto[4] == '/') {
512 if (proto[5] == '1' && proto[6] == '.' && (proto[7] == '1' || proto[7] == '0')) {
513 /* length already checked before calling this routine */
514 /* (len != (size_t)(proto - ptr + 8)) */
515 if (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT) /*(http_header_strict)*/
516 return http_request_header_line_invalid(r, 400, "incomplete request line -> 400");
517 r->http_version = (proto[7] == '1') ? HTTP_VERSION_1_1 : HTTP_VERSION_1_0;
518 }
519 else
520 return http_request_header_line_invalid(r, 505, "unknown HTTP version -> 505");
521 }
522 else
523 return http_request_header_line_invalid(r, 400, "unknown protocol -> 400");
524
525 /* keep-alive default: HTTP/1.1 -> true; HTTP/1.0 -> false */
526 r->keep_alive = (HTTP_VERSION_1_0 != r->http_version);
527
528 return 0;
529 }
530
531 __attribute_cold__
http_request_parse_reqline_uri(request_st * const restrict r,const char * const restrict uri,const size_t len,const unsigned int http_parseopts)532 static const char * http_request_parse_reqline_uri(request_st * const restrict r, const char * const restrict uri, const size_t len, const unsigned int http_parseopts) {
533 const char *nuri;
534 if ((len > 7 && buffer_eq_icase_ssn(uri, "http://", 7)
535 && NULL != (nuri = memchr(uri + 7, '/', len-7)))
536 ||
537 (len > 8 && buffer_eq_icase_ssn(uri, "https://", 8)
538 && NULL != (nuri = memchr(uri + 8, '/', len-8)))) {
539 const char * const host = uri + (uri[4] == ':' ? 7 : 8);
540 const size_t hostlen = nuri - host;
541 if (0 == hostlen || hostlen >= 1024) { /*(expecting < 256)*/
542 http_request_header_line_invalid(r, 400, "uri-authority empty or too long -> 400");
543 return NULL;
544 }
545 /* Insert as "Host" header */
546 http_request_header_set_Host(r, host, hostlen);
547 return nuri;
548 } else if (!(http_parseopts & HTTP_PARSEOPT_HEADER_STRICT) /*(!http_header_strict)*/
549 || (HTTP_METHOD_CONNECT == r->http_method && (uri[0] == ':' || light_isdigit(uri[0])))
550 || (HTTP_METHOD_OPTIONS == r->http_method && uri[0] == '*' && 1 == len)) {
551 /* (permitted) */
552 return uri;
553 } else {
554 http_request_header_line_invalid(r, 400, "request-URI parse error -> 400");
555 return NULL;
556 }
557 }
558
559
560 __attribute_cold__
561 __attribute_noinline__
562 static int http_request_parse_header_other(request_st * const restrict r, const char * const restrict k, const int klen, const unsigned int http_header_strict);
563
564
565 int
http_request_validate_pseudohdrs(request_st * const restrict r,const int scheme,const unsigned int http_parseopts)566 http_request_validate_pseudohdrs (request_st * const restrict r, const int scheme, const unsigned int http_parseopts)
567 {
568 /* :method is required to indicate method
569 * CONNECT method must have :method and :authority
570 * unless RFC8441 CONNECT extension, which must follow 'other' (below)
571 * All other methods must have at least :method :scheme :path */
572
573 if (HTTP_METHOD_UNSET == r->http_method)
574 return http_request_header_line_invalid(r, 400,
575 "missing pseudo-header method -> 400");
576
577 if (HTTP_METHOD_CONNECT != r->http_method)
578 r->h2_connect_ext = 0;
579
580 if (__builtin_expect( (HTTP_METHOD_CONNECT != r->http_method), 1)
581 || __builtin_expect( (r->h2_connect_ext != 0), 0)) {
582
583 if (!scheme)
584 return http_request_header_line_invalid(r, 400,
585 "missing pseudo-header scheme -> 400");
586
587 if (buffer_is_blank(&r->target))
588 return http_request_header_line_invalid(r, 400,
589 "missing pseudo-header path -> 400");
590
591 const char * const uri = r->target.ptr;
592 if (*uri != '/') { /* (common case: (*uri == '/')) */
593 if (uri[0] != '*' || uri[1] != '\0'
594 || HTTP_METHOD_OPTIONS != r->http_method)
595 return http_request_header_line_invalid(r, 400,
596 "invalid pseudo-header path -> 400");
597 }
598 }
599 else { /* HTTP_METHOD_CONNECT */
600 if (NULL == r->http_host)
601 return http_request_header_line_invalid(r, 400,
602 "missing pseudo-header authority -> 400");
603 if (!buffer_is_blank(&r->target) || scheme)
604 return http_request_header_line_invalid(r, 400,
605 "invalid pseudo-header with CONNECT -> 400");
606 /* note: this copy occurs prior to http_request_host_policy()
607 * so any consumer handling CONNECT should normalize r->target
608 * as appropriate */
609 buffer_copy_buffer(&r->target, r->http_host);
610 }
611 buffer_copy_buffer(&r->target_orig, &r->target);
612
613 /* r->http_host, if set, is checked with http_request_host_policy()
614 * in http_request_parse() */
615
616 /* copied and modified from end of http_request_parse_reqline() */
617
618 /* check uri for invalid characters */
619 const uint32_t len = buffer_clen(&r->target);/*(http_header_strict)*/
620 const char * const x = (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT)
621 ? (http_parseopts & HTTP_PARSEOPT_URL_NORMALIZE_CTRLS_REJECT)
622 ? NULL /* URI will be checked in http_request_parse_target() */
623 : http_request_check_uri_strict((const uint8_t *)r->target.ptr, len)
624 : http_request_check_line_minimal(r->target.ptr, len);
625 return (NULL == x)
626 ? 0
627 : http_request_header_char_invalid(r, *x,
628 "invalid character in URI -> 400");
629 }
630
631
632 int
http_request_parse_header(request_st * const restrict r,http_header_parse_ctx * const restrict hpctx)633 http_request_parse_header (request_st * const restrict r, http_header_parse_ctx * const restrict hpctx)
634 {
635 /* Note: k and v might not be '\0' terminated strings;
636 * care must be taken to avoid libc funcs which expect z-strings */
637 const char * const restrict k = hpctx->k;
638 const char * restrict v = hpctx->v;
639 const uint32_t klen = hpctx->klen;
640 uint32_t vlen = hpctx->vlen;
641
642 if (0 == klen)
643 return http_request_header_line_invalid(r, 400,
644 "invalid header key -> 400");
645
646 if ((hpctx->hlen += klen + vlen + 4) > hpctx->max_request_field_size) {
647 /*(configurable with server.max-request-field-size; default 8k)*/
648 #if 1 /* emit to error log for people sending large headers */
649 log_error(r->conf.errh, __FILE__, __LINE__,
650 "oversized request header -> 431");
651 return 431; /* Request Header Fields Too Large */
652 #else
653 /* 431 Request Header Fields Too Large */
654 return http_request_header_line_invalid(r, 431,
655 "oversized request header -> 431");
656 #endif
657 }
658
659 if (!hpctx->trailers) {
660 if (*k == ':') {
661 /* HTTP/2 request pseudo-header fields */
662 if (!hpctx->pseudo) /*(pseudo header after non-pseudo header)*/
663 return http_request_header_line_invalid(r, 400,
664 "invalid pseudo-header -> 400");
665 if (0 == vlen)
666 return http_request_header_line_invalid(r, 400,
667 "invalid header value -> 400");
668
669 /* (note: relies on implementation details using ls-hpack in h2.c)
670 * (hpctx->id mapped from lsxpack_header_t hpack_index, which only
671 * matches key, not also value, if lsxpack_header_t flags does not
672 * have LSXPACK_HPACK_VAL_MATCHED set, so HTTP_HEADER_H2_METHOD_GET
673 * below indicates any method, not only "GET") */
674 if (__builtin_expect( (hpctx->id == HTTP_HEADER_H2_UNKNOWN), 0)) {
675 switch (klen-1) {
676 case 4:
677 if (0 == memcmp(k+1, "path", 4))
678 hpctx->id = HTTP_HEADER_H2_PATH;
679 break;
680 case 6:
681 if (0 == memcmp(k+1, "method", 6))
682 hpctx->id = HTTP_HEADER_H2_METHOD_GET;
683 else if (0 == memcmp(k+1, "scheme", 6))
684 hpctx->id = HTTP_HEADER_H2_SCHEME_HTTP;
685 break;
686 case 8:
687 if (0 == memcmp(k+1, "protocol", 8))
688 hpctx->id = HTTP_HEADER_H2_PROTOCOL;
689 break;
690 case 9:
691 if (0 == memcmp(k+1, "authority", 9))
692 hpctx->id = HTTP_HEADER_H2_AUTHORITY;
693 break;
694 default:
695 break;
696 }
697 if (hpctx->id >= HTTP_HEADER_H2_UNKNOWN)
698 return http_request_header_line_invalid(r, 400,
699 "invalid pseudo-header -> 400");
700 }
701
702 switch (hpctx->id) {
703 case HTTP_HEADER_H2_AUTHORITY:
704 if (__builtin_expect( (r->http_host != NULL), 0))
705 break;
706 if (vlen >= 1024) /*(expecting < 256)*/
707 return http_request_header_line_invalid(r, 400,
708 "invalid pseudo-header authority too long -> 400");
709 /* insert as "Host" header */
710 http_request_header_set_Host(r, v, vlen);
711 return 0;
712 case HTTP_HEADER_H2_METHOD_GET: /*(any method, not only "GET")*/
713 case HTTP_HEADER_H2_METHOD_POST:
714 if (__builtin_expect( (HTTP_METHOD_UNSET != r->http_method), 0))
715 break;
716 r->http_method = get_http_method_key(v, vlen);
717 if (HTTP_METHOD_UNSET >= r->http_method)
718 return http_request_header_line_invalid(r, 501,
719 "unknown http-method -> 501");
720 return 0;
721 case HTTP_HEADER_H2_PATH: /*(any path, not only "/")*/
722 case HTTP_HEADER_H2_PATH_INDEX_HTML:
723 if (__builtin_expect( (!buffer_is_blank(&r->target)), 0))
724 break;
725 buffer_copy_string_len(&r->target, v, vlen);
726 return 0;
727 case HTTP_HEADER_H2_SCHEME_HTTP: /*(any scheme, not only "http")*/
728 case HTTP_HEADER_H2_SCHEME_HTTPS:
729 if (__builtin_expect( (hpctx->scheme), 0))
730 break;
731 hpctx->scheme = 1; /*(marked present, but otherwise ignored)*/
732 return 0;
733 #if 0
734 switch (vlen) {/*(validated, but then ignored)*/
735 case 5: /* "https" */
736 if (v[4]!='s') break;
737 __attribute_fallthrough__
738 case 4: /* "http" */
739 if (v[0]=='h' && v[1]=='t' && v[2]=='t' && v[3]=='p') {
740 hpctx->scheme = 1;
741 return 0;
742 }
743 break;
744 default:
745 break;
746 }
747 return http_request_header_line_invalid(r, 400,
748 "unknown pseudo-header scheme -> 400");
749 #endif
750 case HTTP_HEADER_H2_PROTOCOL:
751 /* support only ":protocol: websocket" for now */
752 if (vlen != 9 || 0 != memcmp(v, "websocket", 9))
753 return http_request_header_line_invalid(r, 405,
754 "unhandled :protocol value -> 405");
755 /*(future: might be enum of recognized :protocol: ext values)*/
756 r->h2_connect_ext = 1;
757 return 0;
758 default:
759 return http_request_header_line_invalid(r, 400,
760 "invalid pseudo-header -> 400");
761 }
762 return http_request_header_line_invalid(r, 400,
763 "repeated pseudo-header -> 400");
764 }
765 else { /*(non-pseudo headers)*/
766 if (hpctx->pseudo) { /*(transition to non-pseudo headers)*/
767 hpctx->pseudo = 0;
768 int status =
769 http_request_validate_pseudohdrs(r, hpctx->scheme,
770 hpctx->http_parseopts);
771 if (0 != status) return status;
772 }
773 if (0 == vlen)
774 return 0;
775
776 const unsigned int http_header_strict =
777 (hpctx->http_parseopts & HTTP_PARSEOPT_HEADER_STRICT);
778
779 const char * const x = (http_header_strict)
780 ? http_request_check_line_strict(v, vlen)
781 : http_request_check_line_minimal(v, vlen);
782 if (x)
783 return http_request_header_char_invalid(r, *x,
784 "invalid character in header -> 400");
785
786 /* remove leading and trailing whitespace (strict RFC conformance)*/
787 if (__builtin_expect( (*v <= 0x20), 0)) {
788 while ((*v == ' ' || *v == '\t') && (++v, --vlen)) ;
789 if (0 == vlen)
790 return 0;
791 }
792 if (__builtin_expect( (v[vlen-1] <= 0x20), 0)) {
793 while (v[vlen-1] == ' ' || v[vlen-1] == '\t') --vlen;
794 }
795
796 if (__builtin_expect( (hpctx->id == HTTP_HEADER_H2_UNKNOWN), 0)) {
797 uint32_t j = 0;
798 while (j < klen && (light_islower(k[j]) || k[j] == '-'))
799 ++j;
800
801 if (__builtin_expect( (j != klen), 0)) {
802 if (light_isupper(k[j]))
803 return 400;
804 if (0 != http_request_parse_header_other(r, k+j, klen-j,
805 http_header_strict))
806 return 400;
807 }
808
809 hpctx->id = http_header_hkey_get_lc(k, klen);
810 }
811
812 const enum http_header_e id = (enum http_header_e)hpctx->id;
813
814 if (__builtin_expect( (id == HTTP_HEADER_TE), 0)
815 && !buffer_eq_icase_ss(v, vlen, CONST_STR_LEN("trailers")))
816 return http_request_header_line_invalid(r, 400,
817 "invalid TE header value with HTTP/2 -> 400");
818
819 return http_request_parse_single_header(r, id, k, klen, v, vlen);
820 }
821 }
822 else { /*(trailers)*/
823 if (*k == ':')
824 return http_request_header_line_invalid(r, 400,
825 "invalid pseudo-header in trailers -> 400");
826 /* ignore trailers (after required HPACK decoding) if streaming
827 * request body to backend since headers have already been sent
828 * to backend via Common Gateway Interface (CGI) (CGI, FastCGI,
829 * SCGI, etc) or HTTP/1.1 (proxy) (mod_proxy does not currently
830 * support using HTTP/2 to connect to backends) */
831 #if 0 /* (if needed, save flag in hpctx instead of fdevent.h dependency)*/
832 if (r->conf.stream_request_body & FDEVENT_STREAM_REQUEST)
833 return 0;
834 #endif
835 /* Note: do not unconditionally merge into headers since if
836 * headers had already been sent to backend, then mod_accesslog
837 * logging of request headers might be inaccurate.
838 * Many simple backends do not support HTTP/1.1 requests sending
839 * Transfer-Encoding: chunked, and even those that do might not
840 * handle trailers. Some backends do not even support HTTP/1.1.
841 * For all these reasons, ignore trailers if streaming request
842 * body to backend. Revisit in future if adding support for
843 * connecting to backends using HTTP/2 (with explicit config
844 * option to force connecting to backends using HTTP/2) */
845
846 /* XXX: TODO: request trailers not handled if streaming reqbody
847 * XXX: must ensure that trailers are not disallowed field-names
848 */
849
850 #if 0
851 if (0 == vlen)
852 return 0;
853 #endif
854
855 return 0;
856 }
857 }
858
859
http_request_parse_reqline(request_st * const restrict r,const char * const restrict ptr,const unsigned short * const restrict hoff,const unsigned int http_parseopts)860 static int http_request_parse_reqline(request_st * const restrict r, const char * const restrict ptr, const unsigned short * const restrict hoff, const unsigned int http_parseopts) {
861 size_t len = hoff[2];
862
863 /* parse the first line of the request
864 * <method> <uri> <protocol>\r\n
865 * */
866 if (len < 13) /* minimum len with (!http_header_strict): "x x HTTP/1.0\n" */
867 return http_request_header_line_invalid(r, 400, "invalid request line (too short) -> 400");
868 if (ptr[len-2] == '\r')
869 len-=2;
870 else if (!(http_parseopts & HTTP_PARSEOPT_HEADER_STRICT)) /*(!http_header_strict)*/
871 len-=1;
872 else
873 return http_request_header_line_invalid(r, 400, "missing CR before LF in header -> 400");
874
875 /*
876 * RFC7230:
877 * HTTP-version = HTTP-name "/" DIGIT "." DIGIT
878 * HTTP-name = %x48.54.54.50 ; "HTTP", case-sensitive
879 */
880
881 /* protocol is expected to be " HTTP/1.1" or " HTTP/1.0" at end of line */
882 union proto_un {
883 char c[8];
884 uint64_t u;
885 };
886 static const union proto_un http_1_1 = {{'H','T','T','P','/','1','.','1'}};
887 static const union proto_un http_1_0 = {{'H','T','T','P','/','1','.','0'}};
888 const char *p = ptr + len - 8;
889 union proto_un proto8;
890 proto8.c[0]=p[0]; proto8.c[1]=p[1]; proto8.c[2]=p[2]; proto8.c[3]=p[3];
891 proto8.c[4]=p[4]; proto8.c[5]=p[5]; proto8.c[6]=p[6]; proto8.c[7]=p[7];
892 if (p[-1] == ' ' && http_1_1.u == proto8.u) {
893 r->http_version = HTTP_VERSION_1_1;
894 r->keep_alive = 1; /* keep-alive default: HTTP/1.1 -> true */
895 }
896 else if (p[-1] == ' ' && http_1_0.u == proto8.u) {
897 r->http_version = HTTP_VERSION_1_0;
898 r->keep_alive = 0; /* keep-alive default: HTTP/1.0 -> false */
899 }
900 else {
901 int status = http_request_parse_proto_loose(r,ptr,len,http_parseopts);
902 if (0 != status) return status;
903 /*(space char must exist if http_request_parse_proto_loose() succeeds)*/
904 for (p = ptr + len - 9; p[-1] != ' '; --p) ;
905 }
906
907 /* method is expected to be a short string in the general case */
908 size_t i = 0;
909 while (ptr[i] != ' ') ++i;
910 #if 0 /*(space must exist if protocol was parsed successfully)*/
911 while (i < len && ptr[i] != ' ') ++i;
912 if (ptr[i] != ' ')
913 return http_request_header_line_invalid(r, 400, "incomplete request line -> 400");
914 #endif
915
916 r->http_method = get_http_method_key(ptr, i);
917 if (HTTP_METHOD_UNSET >= r->http_method)
918 return http_request_header_line_invalid(r, 501, "unknown http-method -> 501");
919
920 const char *uri = ptr + i + 1;
921
922 if (uri == p)
923 return http_request_header_line_invalid(r, 400, "no uri specified -> 400");
924 len = (size_t)(p - uri - 1);
925
926 if (*uri != '/') { /* (common case: (*uri == '/')) */
927 uri = http_request_parse_reqline_uri(r, uri, len, http_parseopts);
928 if (NULL == uri) return 400;
929 len = (size_t)(p - uri - 1);
930 }
931
932 if (0 == len)
933 return http_request_header_line_invalid(r, 400, "no uri specified -> 400");
934
935 /* check uri for invalid characters */ /* http_header_strict */
936 const char * const x = (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT)
937 ? (http_parseopts & HTTP_PARSEOPT_URL_NORMALIZE_CTRLS_REJECT)
938 ? NULL /* URI will be checked in http_request_parse_target() */
939 : http_request_check_uri_strict((const uint8_t *)uri, len)
940 : memchr(ptr, '\0', hoff[hoff[0]]);/* check entire headers set for '\0' */
941 if (x)
942 http_request_header_char_invalid(r, *x, "invalid character in URI -> 400");
943
944 buffer_copy_string_len(&r->target, uri, len);
945 buffer_copy_string_len(&r->target_orig, uri, len);
946 return 0;
947 }
948
http_request_parse_target(request_st * const r,int scheme_port)949 int http_request_parse_target(request_st * const r, int scheme_port) {
950 /* URI is parsed into components at start of request and may
951 * also be re-parsed upon HANDLER_COMEBACK during the request
952 * r->target is expected to be a "/url-part?query-part"
953 * (and *not* a fully-qualified URI starting https://...)
954 * r->uri.authority is expected to be parsed elsewhere into r->http_host
955 */
956
957 /**
958 * prepare strings
959 *
960 * - uri.path
961 * - uri.query
962 *
963 */
964
965 /**
966 * Name according to RFC 2396
967 *
968 * - scheme
969 * - authority
970 * - path
971 * - query
972 *
973 * (scheme)://(authority)(path)?(query)#fragment
974 *
975 */
976
977 /* take initial scheme value from connection-level state
978 * (request r->uri.scheme can be overwritten for later,
979 * for example by mod_extforward or mod_magnet) */
980 buffer_copy_string_len(&r->uri.scheme, "https", scheme_port == 443 ? 5 : 4);
981
982 buffer * const target = &r->target;
983 if ((r->http_method == HTTP_METHOD_CONNECT && !r->h2_connect_ext)
984 || (r->http_method == HTTP_METHOD_OPTIONS
985 && target->ptr[0] == '*'
986 && target->ptr[1] == '\0')) {
987 /* CONNECT ... (or) OPTIONS * ... */
988 buffer_copy_buffer(&r->uri.path, target);
989 buffer_clear(&r->uri.query);
990 return 0;
991 }
992
993 char *qstr;
994 if (r->conf.http_parseopts & HTTP_PARSEOPT_URL_NORMALIZE) {
995 /*uint32_t len = buffer_clen(target);*/
996 int qs = burl_normalize(target, r->tmp_buf, r->conf.http_parseopts);
997 if (-2 == qs)
998 return http_request_header_line_invalid(r, 400,
999 "invalid character in URI -> 400"); /* Bad Request */
1000 qstr = (-1 == qs) ? NULL : target->ptr+qs;
1001 #if 0 /* future: might enable here, or below for all requests */
1002 /* (Note: total header size not recalculated on HANDLER_COMEBACK
1003 * even if other request headers changed during processing)
1004 * (If (0 != r->loops_per_request), then the generated
1005 * request is too large. Should a different error be returned?) */
1006 r->rqst_header_len -= len;
1007 len = buffer_clen(target);
1008 r->rqst_header_len += len;
1009 if (len > MAX_HTTP_REQUEST_URI) {
1010 return 414; /* 414 URI Too Long */
1011 }
1012 if (r->rqst_header_len > MAX_HTTP_REQUEST_HEADER) {
1013 log_error(r->conf.errh, __FILE__, __LINE__,
1014 "request header fields too large: %u -> 431",
1015 r->rqst_header_len);
1016 return 431; /* Request Header Fields Too Large */
1017 }
1018 #endif
1019 }
1020 else {
1021 size_t rlen = buffer_clen(target);
1022 qstr = memchr(target->ptr, '#', rlen);/* discard fragment */
1023 if (qstr) {
1024 rlen = (size_t)(qstr - target->ptr);
1025 buffer_truncate(target, rlen);
1026 }
1027 qstr = memchr(target->ptr, '?', rlen);
1028 }
1029
1030 /** extract query string from target */
1031 const char * const pstr = target->ptr;
1032 const uint32_t rlen = buffer_clen(target);
1033 uint32_t plen;
1034 if (NULL != qstr) {
1035 plen = (uint32_t)(qstr - pstr);
1036 buffer_copy_string_len(&r->uri.query, qstr + 1, rlen - plen - 1);
1037 }
1038 else {
1039 plen = rlen;
1040 buffer_clear(&r->uri.query);
1041 }
1042 buffer_copy_string_len(&r->uri.path, pstr, plen);
1043
1044 /* decode url to path
1045 *
1046 * - decode url-encodings (e.g. %20 -> ' ')
1047 * - remove path-modifiers (e.g. /../)
1048 */
1049
1050 buffer_urldecode_path(&r->uri.path);
1051 buffer_path_simplify(&r->uri.path);
1052 if (r->uri.path.ptr[0] != '/')
1053 return http_request_header_line_invalid(r, 400,
1054 "uri-path does not begin with '/' -> 400"); /* Bad Request */
1055
1056 return 0;
1057 }
1058
1059 __attribute_cold__
1060 __attribute_noinline__
http_request_parse_header_other(request_st * const restrict r,const char * const restrict k,const int klen,const unsigned int http_header_strict)1061 static int http_request_parse_header_other(request_st * const restrict r, const char * const restrict k, const int klen, const unsigned int http_header_strict) {
1062 for (int i = 0; i < klen; ++i) {
1063 if (light_isalpha(k[i]) || k[i] == '-') continue; /*(common cases)*/
1064 /**
1065 * 1*<any CHAR except CTLs or separators>
1066 * CTLs == 0-31 + 127, CHAR = 7-bit ascii (0..127)
1067 *
1068 */
1069 switch(k[i]) {
1070 case ' ':
1071 case '\t':
1072 return http_request_header_line_invalid(r, 400, "WS character in key -> 400");
1073 case '\r':
1074 case '\n':
1075 case '(':
1076 case ')':
1077 case '<':
1078 case '>':
1079 case '@':
1080 case ',':
1081 case ':':
1082 case ';':
1083 case '\\':
1084 case '\"':
1085 case '/':
1086 case '[':
1087 case ']':
1088 case '?':
1089 case '=':
1090 case '{':
1091 case '}':
1092 return http_request_header_char_invalid(r, k[i], "invalid character in header key -> 400");
1093 default:
1094 if (http_header_strict ? (k[i] < 32 || ((unsigned char *)k)[i] >= 127) : k[i] == '\0')
1095 return http_request_header_char_invalid(r, k[i], "invalid character in header key -> 400");
1096 break; /* ok */
1097 }
1098 }
1099 return 0;
1100 }
1101
http_request_parse_headers(request_st * const restrict r,char * const restrict ptr,const unsigned short * const restrict hoff,const unsigned int http_parseopts)1102 static int http_request_parse_headers(request_st * const restrict r, char * const restrict ptr, const unsigned short * const restrict hoff, const unsigned int http_parseopts) {
1103 const unsigned int http_header_strict = (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT);
1104
1105 #if 0 /*(not checked here; will later result in invalid label for HTTP header)*/
1106 int i = hoff[2];
1107
1108 if (ptr[i] == ' ' || ptr[i] == '\t') {
1109 return http_request_header_line_invalid(r, 400, "WS at the start of first line -> 400");
1110 }
1111 #endif
1112
1113 for (int i = 2; i < hoff[0]; ++i) {
1114 const char *k = ptr + hoff[i];
1115 /* one past last line hoff[hoff[0]] is to final "\r\n" */
1116 char *end = ptr + hoff[i+1];
1117
1118 const char *colon = memchr(k, ':', end - k);
1119 if (NULL == colon)
1120 return http_request_header_line_invalid(r, 400, "invalid header missing ':' -> 400");
1121
1122 const char *v = colon + 1;
1123
1124 /* RFC7230 Hypertext Transfer Protocol (HTTP/1.1): Message Syntax and Routing
1125 * 3.2.4. Field Parsing
1126 * [...]
1127 * No whitespace is allowed between the header field-name and colon. In
1128 * the past, differences in the handling of such whitespace have led to
1129 * security vulnerabilities in request routing and response handling. A
1130 * server MUST reject any received request message that contains
1131 * whitespace between a header field-name and colon with a response code
1132 * of 400 (Bad Request). A proxy MUST remove any such whitespace from a
1133 * response message before forwarding the message downstream.
1134 */
1135 /* (line k[-1] is always preceded by a '\n',
1136 * including first header after request-line,
1137 * so no need to check colon != k) */
1138 if (colon[-1] == ' ' || colon[-1] == '\t') {
1139 if (http_header_strict) {
1140 return http_request_header_line_invalid(r, 400, "invalid whitespace between field-name and colon -> 400");
1141 }
1142 else {
1143 /* remove trailing whitespace from key(if !http_header_strict)*/
1144 do { --colon; } while (colon[-1] == ' ' || colon[-1] == '\t');
1145 }
1146 }
1147
1148 const int klen = (int)(colon - k);
1149 if (0 == klen)
1150 return http_request_header_line_invalid(r, 400, "invalid header key -> 400");
1151 const enum http_header_e id = http_header_hkey_get(k, klen);
1152
1153 if (id == HTTP_HEADER_OTHER) {
1154 for (int j = 0; j < klen; ++j) {
1155 if (light_isalpha(k[j]) || k[j] == '-') continue; /*(common cases)*/
1156 if (0 != http_request_parse_header_other(r, k+j, klen-j, http_header_strict))
1157 return 400;
1158 break;
1159 }
1160 }
1161
1162 /* remove leading whitespace from value */
1163 while (*v == ' ' || *v == '\t') ++v;
1164
1165 for (; i+1 <= hoff[0]; ++i) {
1166 end = ptr + hoff[i+1];
1167 if (end[0] != ' ' && end[0] != '\t') break;
1168
1169 /* line folding */
1170 #ifdef __COVERITY__
1171 force_assert(end - k >= 2);
1172 #endif
1173 if (end[-2] == '\r')
1174 end[-2] = ' ';
1175 else if (http_header_strict)
1176 return http_request_header_line_invalid(r, 400, "missing CR before LF in header -> 400");
1177 end[-1] = ' ';
1178 }
1179 #ifdef __COVERITY__
1180 /*(buf holding k has non-zero request-line, so end[-2] valid)*/
1181 force_assert(end >= k + 2);
1182 #endif
1183 if (end[-2] == '\r')
1184 --end;
1185 else if (http_header_strict)
1186 return http_request_header_line_invalid(r, 400, "missing CR before LF in header -> 400");
1187 /* remove trailing whitespace from value (+ remove '\r\n') */
1188 /* (line k[-1] is always preceded by a '\n',
1189 * including first header after request-line,
1190 * so no need to check (end != k)) */
1191 do { --end; } while (end[-1] == ' ' || end[-1] == '\t');
1192
1193 const int vlen = (int)(end - v);
1194 /* empty header-fields are not allowed by HTTP-RFC, we just ignore them */
1195 if (vlen <= 0) continue; /* ignore header */
1196
1197 if (http_header_strict) {
1198 const char * const x = http_request_check_line_strict(v, vlen);
1199 if (x)
1200 return http_request_header_char_invalid(r, *x,
1201 "invalid character in header -> 400");
1202 } /* else URI already checked in http_request_parse_reqline() for any '\0' */
1203
1204 int status = http_request_parse_single_header(r, id, k, (size_t)klen, v, (size_t)vlen);
1205 if (0 != status) return status;
1206 }
1207
1208 return 0;
1209 }
1210
1211
1212 static int
http_request_parse(request_st * const restrict r,const int scheme_port)1213 http_request_parse (request_st * const restrict r, const int scheme_port)
1214 {
1215 int status = http_request_parse_target(r, scheme_port);
1216 if (0 != status) return status;
1217
1218 /* post-processing */
1219 const unsigned int http_parseopts = r->conf.http_parseopts;
1220
1221 /* check hostname field if it is set */
1222 /*(r->http_host might not be set until after parsing request headers)*/
1223 if (__builtin_expect( (r->http_host != NULL), 1)) {
1224 if (0 != http_request_host_policy(r->http_host,
1225 http_parseopts, scheme_port))
1226 return http_request_header_line_invalid(r, 400, "Invalid Hostname -> 400");
1227 buffer_copy_buffer(&r->uri.authority, r->http_host);
1228 }
1229 else {
1230 buffer_copy_string_len(&r->uri.authority, CONST_STR_LEN(""));
1231 if (r->http_version >= HTTP_VERSION_1_1)
1232 return http_request_header_line_invalid(r, 400, "HTTP/1.1 but Host missing -> 400");
1233 }
1234
1235 if (HTTP_VERSION_1_1 != r->http_version
1236 && (r->rqst_htags
1237 & (light_bshift(HTTP_HEADER_UPGRADE)
1238 |light_bshift(HTTP_HEADER_HTTP2_SETTINGS)))) {
1239 return http_request_header_line_invalid(r, 400, "invalid hop-by-hop header w/o HTTP/1.1 -> 400");
1240 }
1241
1242 if (0 == r->reqbody_length) {
1243 /* POST requires Content-Length (or Transfer-Encoding)
1244 * (-1 == r->reqbody_length when Transfer-Encoding: chunked)*/
1245 if (HTTP_METHOD_POST == r->http_method
1246 && !light_btst(r->rqst_htags, HTTP_HEADER_CONTENT_LENGTH)) {
1247 return http_request_header_line_invalid(r, 411, "POST-request, but content-length missing -> 411");
1248 }
1249 }
1250 else {
1251 /* (-1 == r->reqbody_length when Transfer-Encoding: chunked)*/
1252 if (-1 == r->reqbody_length
1253 && light_btst(r->rqst_htags, HTTP_HEADER_CONTENT_LENGTH)) {
1254 /* RFC7230 Hypertext Transfer Protocol (HTTP/1.1): Message Syntax and Routing
1255 * 3.3.3. Message Body Length
1256 * [...]
1257 * If a message is received with both a Transfer-Encoding and a
1258 * Content-Length header field, the Transfer-Encoding overrides the
1259 * Content-Length. Such a message might indicate an attempt to
1260 * perform request smuggling (Section 9.5) or response splitting
1261 * (Section 9.4) and ought to be handled as an error. A sender MUST
1262 * remove the received Content-Length field prior to forwarding such
1263 * a message downstream.
1264 */
1265 const unsigned int http_header_strict =
1266 (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT);
1267 if (http_header_strict) {
1268 return http_request_header_line_invalid(r, 400, "invalid Transfer-Encoding + Content-Length -> 400");
1269 }
1270 else {
1271 /* ignore Content-Length */
1272 http_header_request_unset(r, HTTP_HEADER_CONTENT_LENGTH, CONST_STR_LEN("Content-Length"));
1273 }
1274 }
1275 if (http_method_get_or_head(r->http_method)
1276 && !(http_parseopts & HTTP_PARSEOPT_METHOD_GET_BODY)) {
1277 return http_request_header_line_invalid(r, 400, "GET/HEAD with content-length -> 400");
1278 }
1279 }
1280
1281 return 0;
1282 }
1283
1284
1285 static int
http_request_parse_hoff(request_st * const restrict r,char * const restrict hdrs,const unsigned short * const restrict hoff,const int scheme_port)1286 http_request_parse_hoff (request_st * const restrict r, char * const restrict hdrs, const unsigned short * const restrict hoff, const int scheme_port)
1287 {
1288 /*
1289 * Request: "^(GET|POST|HEAD|...) ([^ ]+(\\?[^ ]+|)) (HTTP/1\\.[01])$"
1290 * Header : "^([-a-zA-Z]+): (.+)$"
1291 * End : "^$"
1292 */
1293
1294 int status;
1295 const unsigned int http_parseopts = r->conf.http_parseopts;
1296
1297 status = http_request_parse_reqline(r, hdrs, hoff, http_parseopts);
1298 if (0 != status) return status;
1299
1300 status = http_request_parse_headers(r, hdrs, hoff, http_parseopts);
1301 if (0 != status) return status;
1302
1303 return http_request_parse(r, scheme_port);
1304 }
1305
1306
1307 static void
http_request_headers_fin(request_st * const restrict r)1308 http_request_headers_fin (request_st * const restrict r)
1309 {
1310 if (0 == r->http_status) {
1311 #if 0
1312 r->conditional_is_valid = (1 << COMP_SERVER_SOCKET)
1313 | (1 << COMP_HTTP_SCHEME)
1314 | (1 << COMP_HTTP_HOST)
1315 | (1 << COMP_HTTP_REMOTE_IP)
1316 | (1 << COMP_HTTP_REQUEST_METHOD)
1317 | (1 << COMP_HTTP_URL)
1318 | (1 << COMP_HTTP_QUERY_STRING)
1319 | (1 << COMP_HTTP_REQUEST_HEADER);
1320 #else
1321 /* all config conditions are valid after parsing header
1322 * (set all bits; remove dependency on plugin_config.h) */
1323 r->conditional_is_valid = ~0u;
1324 #endif
1325 }
1326 else {
1327 r->keep_alive = 0;
1328 r->reqbody_length = 0;
1329 }
1330 }
1331
1332
1333 void
http_request_headers_process(request_st * const restrict r,char * const restrict hdrs,const unsigned short * const restrict hoff,const int scheme_port)1334 http_request_headers_process (request_st * const restrict r, char * const restrict hdrs, const unsigned short * const restrict hoff, const int scheme_port)
1335 {
1336 r->http_status = http_request_parse_hoff(r, hdrs, hoff, scheme_port);
1337
1338 http_request_headers_fin(r);
1339
1340 if (__builtin_expect( (0 != r->http_status), 0)) {
1341 if (r->conf.log_request_header_on_error) {
1342 /*(http_request_parse_headers() modifies hdrs only to
1343 * undo line-wrapping in-place using spaces)*/
1344 log_error_multiline(r->conf.errh, __FILE__, __LINE__,
1345 hdrs, r->rqst_header_len, "rqst: ");
1346 }
1347 }
1348 }
1349
1350
1351 void
http_request_headers_process_h2(request_st * const restrict r,const int scheme_port)1352 http_request_headers_process_h2 (request_st * const restrict r, const int scheme_port)
1353 {
1354 if (0 == r->http_status)
1355 r->http_status = http_request_parse(r, scheme_port);
1356
1357 if (0 == r->http_status) {
1358 if (light_btst(r->rqst_htags, HTTP_HEADER_CONNECTION))
1359 r->http_status = http_request_header_line_invalid(r, 400,
1360 "invalid Connection header with HTTP/2 -> 400");
1361 }
1362
1363 http_request_headers_fin(r);
1364
1365 /* limited; headers not collected into a single buf for HTTP/2 */
1366 if (__builtin_expect( (0 != r->http_status), 0)) {
1367 if (r->conf.log_request_header_on_error) {
1368 log_error(r->conf.errh, __FILE__, __LINE__,
1369 "request-header:\n:authority: %s\n:method: %s\n:path: %s",
1370 r->http_host ? r->http_host->ptr : "",
1371 http_method_buf(r->http_method)->ptr,
1372 !buffer_is_blank(&r->target) ? r->target.ptr : "");
1373 }
1374 }
1375
1376 /* ignore Upgrade if using HTTP/2 */
1377 if (light_btst(r->rqst_htags, HTTP_HEADER_UPGRADE))
1378 http_header_request_unset(r, HTTP_HEADER_UPGRADE,
1379 CONST_STR_LEN("upgrade"));
1380 /* XXX: should filter out other hop-by-hop connection headers, too */
1381 }
1382