1 /* 2 * request - HTTP request processing 3 * 4 * Fully-rewritten from original EXCEPT for request_check_hostname() 5 * Copyright(c) 2018 Glenn Strauss gstrauss()gluelogic.com All rights reserved 6 * License: BSD 3-clause (same as lighttpd) 7 */ 8 #include "first.h" 9 10 #include "request.h" 11 #include "burl.h" 12 #include "http_header.h" 13 #include "http_kv.h" 14 #include "log.h" 15 #include "sock_addr.h" 16 17 #include <limits.h> 18 #include <stdint.h> 19 #include <stdlib.h> 20 #include <string.h> 21 22 static int request_check_hostname(buffer * const host) { 23 enum { DOMAINLABEL, TOPLABEL } stage = TOPLABEL; 24 size_t i; 25 int label_len = 0; 26 size_t host_len, hostport_len; 27 char *colon; 28 int is_ip = -1; /* -1 don't know yet, 0 no, 1 yes */ 29 int level = 0; 30 31 /* 32 * hostport = host [ ":" port ] 33 * host = hostname | IPv4address | IPv6address 34 * hostname = *( domainlabel "." ) toplabel [ "." ] 35 * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum 36 * toplabel = alpha | alpha *( alphanum | "-" ) alphanum 37 * IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit 38 * IPv6address = "[" ... "]" 39 * port = *digit 40 */ 41 42 /* IPv6 address */ 43 if (host->ptr[0] == '[') { 44 char *c = host->ptr + 1; 45 int colon_cnt = 0; 46 47 /* check the address inside [...] */ 48 for (; *c && *c != ']'; c++) { 49 if (*c == ':') { 50 if (++colon_cnt > 7) { 51 return -1; 52 } 53 } else if (!light_isxdigit(*c) && '.' != *c) { 54 return -1; 55 } 56 } 57 58 /* missing ] */ 59 if (!*c) { 60 return -1; 61 } 62 63 /* check port */ 64 if (*(c+1) == ':') { 65 for (c += 2; *c; c++) { 66 if (!light_isdigit(*c)) { 67 return -1; 68 } 69 } 70 } 71 else if ('\0' != *(c+1)) { 72 /* only a port is allowed to follow [...] */ 73 return -1; 74 } 75 return 0; 76 } 77 78 hostport_len = host_len = buffer_string_length(host); 79 80 if (NULL != (colon = memchr(host->ptr, ':', host_len))) { 81 char *c = colon + 1; 82 83 /* check portnumber */ 84 for (; *c; c++) { 85 if (!light_isdigit(*c)) return -1; 86 } 87 88 /* remove the port from the host-len */ 89 host_len = colon - host->ptr; 90 } 91 92 /* Host is empty */ 93 if (host_len == 0) return -1; 94 95 /* if the hostname ends in a "." strip it */ 96 if (host->ptr[host_len-1] == '.') { 97 /* shift port info one left */ 98 if (NULL != colon) memmove(colon-1, colon, hostport_len - host_len); 99 buffer_string_set_length(host, --hostport_len); 100 if (--host_len == 0) return -1; 101 } 102 103 104 /* scan from the right and skip the \0 */ 105 for (i = host_len; i-- > 0; ) { 106 const char c = host->ptr[i]; 107 108 switch (stage) { 109 case TOPLABEL: 110 if (c == '.') { 111 /* only switch stage, if this is not the last character */ 112 if (i != host_len - 1) { 113 if (label_len == 0) { 114 return -1; 115 } 116 117 /* check the first character at right of the dot */ 118 if (is_ip == 0) { 119 if (!light_isalnum(host->ptr[i+1])) { 120 return -1; 121 } 122 } else if (!light_isdigit(host->ptr[i+1])) { 123 is_ip = 0; 124 } else if ('-' == host->ptr[i+1]) { 125 return -1; 126 } else { 127 /* just digits */ 128 is_ip = 1; 129 } 130 131 stage = DOMAINLABEL; 132 133 label_len = 0; 134 level++; 135 } else if (i == 0) { 136 /* just a dot and nothing else is evil */ 137 return -1; 138 } 139 } else if (i == 0) { 140 /* the first character of the hostname */ 141 if (!light_isalnum(c)) { 142 return -1; 143 } 144 label_len++; 145 } else { 146 if (c != '-' && !light_isalnum(c)) { 147 return -1; 148 } 149 if (is_ip == -1) { 150 if (!light_isdigit(c)) is_ip = 0; 151 } 152 label_len++; 153 } 154 155 break; 156 case DOMAINLABEL: 157 if (is_ip == 1) { 158 if (c == '.') { 159 if (label_len == 0) { 160 return -1; 161 } 162 163 label_len = 0; 164 level++; 165 } else if (!light_isdigit(c)) { 166 return -1; 167 } else { 168 label_len++; 169 } 170 } else { 171 if (c == '.') { 172 if (label_len == 0) { 173 return -1; 174 } 175 176 /* c is either - or alphanum here */ 177 if ('-' == host->ptr[i+1]) { 178 return -1; 179 } 180 181 label_len = 0; 182 level++; 183 } else if (i == 0) { 184 if (!light_isalnum(c)) { 185 return -1; 186 } 187 label_len++; 188 } else { 189 if (c != '-' && !light_isalnum(c)) { 190 return -1; 191 } 192 label_len++; 193 } 194 } 195 196 break; 197 } 198 } 199 200 /* a IP has to consist of 4 parts */ 201 if (is_ip == 1 && level != 3) { 202 return -1; 203 } 204 205 if (label_len == 0) { 206 return -1; 207 } 208 209 return 0; 210 } 211 212 int http_request_host_normalize(buffer * const b, const int scheme_port) { 213 /* 214 * check for and canonicalize numeric IP address and portnum (optional) 215 * (IP address may be followed by ":portnum" (optional)) 216 * - IPv6: "[...]" 217 * - IPv4: "x.x.x.x" 218 * - IPv4: 12345678 (32-bit decimal number) 219 * - IPv4: 012345678 (32-bit octal number) 220 * - IPv4: 0x12345678 (32-bit hex number) 221 * 222 * allow any chars (except ':' and '\0' and stray '[' or ']') 223 * (other code may check chars more strictly or more pedantically) 224 * ':' delimits (optional) port at end of string 225 * "[]" wraps IPv6 address literal 226 * '\0' should have been rejected earlier were it present 227 * 228 * any chars includes, but is not limited to: 229 * - allow '-' any where, even at beginning of word 230 * (security caution: might be confused for cmd flag if passed to shell) 231 * - allow all-digit TLDs 232 * (might be mistaken for IPv4 addr by inet_aton() 233 * unless non-digits appear in subdomain) 234 */ 235 236 /* Note: not using getaddrinfo() since it does not support "[]" around IPv6 237 * and is not as lenient as inet_aton() and inet_addr() for IPv4 strings. 238 * Not using inet_pton() (when available) on IPv4 for similar reasons. */ 239 240 const char * const p = b->ptr; 241 const size_t blen = buffer_string_length(b); 242 long port = 0; 243 244 if (*p != '[') { 245 char * const colon = (char *)memchr(p, ':', blen); 246 if (colon) { 247 if (*p == ':') return -1; /*(empty host then port, or naked IPv6)*/ 248 if (colon[1] != '\0') { 249 char *e; 250 port = strtol(colon+1, &e, 0); /*(allow decimal, octal, hex)*/ 251 if (0 < port && port <= USHRT_MAX && *e == '\0') { 252 /* valid port */ 253 } else { 254 return -1; 255 } 256 } /*(else ignore stray colon at string end)*/ 257 buffer_string_set_length(b, (size_t)(colon - p)); /*(remove port str)*/ 258 } 259 260 if (light_isdigit(*p)) do { 261 /* (IPv4 address literal or domain starting w/ digit (e.g. 3com))*/ 262 /* (check one-element cache of normalized IPv4 address string) */ 263 static struct { char s[INET_ADDRSTRLEN]; size_t n; } laddr; 264 size_t n = colon ? (size_t)(colon - p) : blen; 265 sock_addr addr; 266 if (n == laddr.n && 0 == memcmp(p, laddr.s, n)) break; 267 if (1 == sock_addr_inet_pton(&addr, p, AF_INET, 0)) { 268 sock_addr_inet_ntop_copy_buffer(b, &addr); 269 n = buffer_string_length(b); 270 if (n < sizeof(laddr.s)) memcpy(laddr.s, b->ptr, (laddr.n = n)); 271 } 272 } while (0); 273 } else do { /* IPv6 addr */ 274 #if defined(HAVE_IPV6) && defined(HAVE_INET_PTON) 275 276 /* (check one-element cache of normalized IPv4 address string) */ 277 static struct { char s[INET6_ADDRSTRLEN]; size_t n; } laddr; 278 sock_addr addr; 279 char *bracket = b->ptr+blen-1; 280 char *percent = strchr(b->ptr+1, '%'); 281 size_t len; 282 int rc; 283 char buf[INET6_ADDRSTRLEN+16]; /*(+16 for potential %interface name)*/ 284 if (blen <= 2) return -1; /*(invalid "[]")*/ 285 if (*bracket != ']') { 286 bracket = (char *)memchr(b->ptr+1, ']', blen-1); 287 if (NULL == bracket || bracket[1] != ':' || bracket - b->ptr == 1){ 288 return -1; 289 } 290 if (bracket[2] != '\0') { /*(ignore stray colon at string end)*/ 291 char *e; 292 port = strtol(bracket+2, &e, 0); /*(allow decimal, octal, hex)*/ 293 if (0 < port && port <= USHRT_MAX && *e == '\0') { 294 /* valid port */ 295 } else { 296 return -1; 297 } 298 } 299 } 300 301 len = (size_t)((percent ? percent : bracket) - (b->ptr+1)); 302 if (laddr.n == len && 0 == memcmp(laddr.s, b->ptr+1, len)) { 303 /* truncate after ']' and re-add normalized port, if needed */ 304 buffer_string_set_length(b, (size_t)(bracket - b->ptr + 1)); 305 break; 306 } 307 308 *bracket = '\0';/*(terminate IPv6 string)*/ 309 if (percent) *percent = '\0'; /*(remove %interface from address)*/ 310 rc = sock_addr_inet_pton(&addr, b->ptr+1, AF_INET6, 0); 311 if (percent) *percent = '%'; /*(restore %interface)*/ 312 *bracket = ']'; /*(restore bracket)*/ 313 if (1 != rc) return -1; 314 315 sock_addr_inet_ntop(&addr, buf, sizeof(buf)); 316 len = strlen(buf); 317 if (percent) { 318 if (percent > bracket) return -1; 319 if (len + (size_t)(bracket - percent) >= sizeof(buf)) return -1; 320 if (len < sizeof(laddr.s)) memcpy(laddr.s, buf, (laddr.n = len)); 321 memcpy(buf+len, percent, (size_t)(bracket - percent)); 322 len += (size_t)(bracket - percent); 323 } 324 buffer_string_set_length(b, 1); /* truncate after '[' */ 325 buffer_append_str2(b, buf, len, CONST_STR_LEN("]")); 326 327 #else 328 329 return -1; 330 331 #endif 332 } while (0); 333 334 if (0 != port && port != scheme_port) { 335 buffer_append_string_len(b, CONST_STR_LEN(":")); 336 buffer_append_int(b, (int)port); 337 } 338 339 return 0; 340 } 341 342 int http_request_host_policy (buffer * const b, const unsigned int http_parseopts, const int scheme_port) { 343 return (((http_parseopts & HTTP_PARSEOPT_HOST_STRICT) 344 && 0 != request_check_hostname(b)) 345 || ((http_parseopts & HTTP_PARSEOPT_HOST_NORMALIZE) 346 && 0 != http_request_host_normalize(b, scheme_port))); 347 } 348 349 __attribute_const__ 350 static int request_uri_is_valid_char(const unsigned char c) { 351 return (c > 32 && c != 127 && c != 255); 352 } 353 354 __attribute_cold__ 355 __attribute_noinline__ 356 static int http_request_header_line_invalid(request_st * const restrict r, const int status, const char * const restrict msg) { 357 if (r->conf.log_request_header_on_error) { 358 if (msg) log_error(r->conf.errh, __FILE__, __LINE__, "%s", msg); 359 } 360 return status; 361 } 362 363 __attribute_cold__ 364 __attribute_noinline__ 365 static int http_request_header_char_invalid(request_st * const restrict r, const char ch, const char * const restrict msg) { 366 if (r->conf.log_request_header_on_error) { 367 if ((unsigned char)ch > 32 && ch != 127) { 368 log_error(r->conf.errh, __FILE__, __LINE__, "%s ('%c')", msg, ch); 369 } 370 else { 371 log_error(r->conf.errh, __FILE__, __LINE__, "%s (0x%x)", msg, ch); 372 } 373 } 374 return 400; 375 } 376 377 378 int64_t 379 li_restricted_strtoint64 (const char *v, const uint32_t vlen, const char ** const err) 380 { 381 /* base 10 strtoll() parsing exactly vlen chars and requiring digits 0-9 */ 382 /* rejects negative numbers and considers values > INT64_MAX an error */ 383 /* note: errno is not set; detect error if *err != v+vlen upon return */ 384 /*(caller must check 0 == vlen if that is to be an error for caller)*/ 385 int64_t rv = 0; 386 uint32_t i; 387 for (i = 0; i < vlen; ++i) { 388 const uint8_t c = ((uint8_t *)v)[i] - '0'; /*(unsigned; underflow ok)*/ 389 if (c > 9) break; 390 if (rv > INT64_MAX/10) break; 391 rv *= 10; 392 if (rv > INT64_MAX - c) break; 393 rv += c; 394 } 395 *err = v+i; 396 return rv; 397 } 398 399 400 /* add header to list of headers 401 * certain headers are also parsed 402 * might drop a header if deemed unnecessary/broken 403 * 404 * returns 0 on success, HTTP status on error 405 */ 406 static int http_request_parse_single_header(request_st * const restrict r, const enum http_header_e id, const char * const restrict k, const size_t klen, const char * const restrict v, const size_t vlen) { 407 buffer **saveb = NULL; 408 409 /* 410 * Note: k might not be '\0'-terminated 411 * Note: v is not '\0'-terminated 412 * With lighttpd HTTP/1.1 parser, v ends with whitespace 413 * (one of '\r' '\n' ' ' '\t') 414 * With lighttpd HTTP/2 parser, v should not be accessed beyond vlen 415 * (care must be taken to avoid libc funcs which expect z-strings) 416 */ 417 /*assert(vlen);*//*(caller must not call this func with 0 klen or 0 vlen)*/ 418 419 switch (id) { 420 /*case HTTP_HEADER_OTHER:*/ 421 default: 422 break; 423 case HTTP_HEADER_HOST: 424 if (!light_btst(r->rqst_htags, HTTP_HEADER_HOST)) { 425 saveb = &r->http_host; 426 if (vlen >= 1024) { /*(expecting < 256)*/ 427 return http_request_header_line_invalid(r, 400, "uri-authority too long -> 400"); 428 } 429 } 430 else if (NULL != r->http_host 431 && buffer_is_equal_string(r->http_host, v, vlen)) { 432 /* ignore all Host: headers if match authority in request line */ 433 return 0; /* ignore header */ 434 } 435 else { 436 return http_request_header_line_invalid(r, 400, "duplicate Host header -> 400"); 437 } 438 break; 439 case HTTP_HEADER_CONNECTION: 440 /* "Connection: close" is common case if header is present */ 441 if ((vlen == 5 && buffer_eq_icase_ssn(v, CONST_STR_LEN("close"))) 442 || http_header_str_contains_token(v,vlen,CONST_STR_LEN("close"))) { 443 r->keep_alive = 0; 444 break; 445 } 446 if (http_header_str_contains_token(v,vlen,CONST_STR_LEN("keep-alive"))){ 447 r->keep_alive = 1; 448 break; 449 } 450 break; 451 case HTTP_HEADER_CONTENT_TYPE: 452 if (light_btst(r->rqst_htags, HTTP_HEADER_CONTENT_TYPE)) { 453 return http_request_header_line_invalid(r, 400, "duplicate Content-Type header -> 400"); 454 } 455 break; 456 case HTTP_HEADER_IF_NONE_MATCH: 457 /* if dup, only the first one will survive */ 458 if (light_btst(r->rqst_htags, HTTP_HEADER_IF_NONE_MATCH)) { 459 return 0; /* ignore header */ 460 } 461 break; 462 case HTTP_HEADER_CONTENT_LENGTH: 463 if (!light_btst(r->rqst_htags, HTTP_HEADER_CONTENT_LENGTH)) { 464 /*(trailing whitespace was removed from vlen)*/ 465 /*(not using strtoll() since v might not be z-string)*/ 466 const char *err; 467 off_t clen = (off_t)li_restricted_strtoint64(v, vlen, &err); 468 if (err == v+vlen) { 469 /* (set only if not set to -1 by Transfer-Encoding: chunked) */ 470 if (0 == r->reqbody_length) r->reqbody_length = clen; 471 } 472 else { 473 return http_request_header_line_invalid(r, 400, "invalid Content-Length header -> 400"); 474 } 475 } 476 else { 477 return http_request_header_line_invalid(r, 400, "duplicate Content-Length header -> 400"); 478 } 479 break; 480 case HTTP_HEADER_HTTP2_SETTINGS: 481 if (light_btst(r->rqst_htags, HTTP_HEADER_HTTP2_SETTINGS)) { 482 return http_request_header_line_invalid(r, 400, "duplicate HTTP2-Settings header -> 400"); 483 } 484 break; 485 case HTTP_HEADER_IF_MODIFIED_SINCE: 486 if (light_btst(r->rqst_htags, HTTP_HEADER_IF_MODIFIED_SINCE)) { 487 /* Proxies sometimes send dup headers 488 * if they are the same we ignore the second 489 * if not, we raise an error */ 490 const buffer *vb = 491 http_header_request_get(r, HTTP_HEADER_IF_MODIFIED_SINCE, 492 CONST_STR_LEN("If-Modified-Since")); 493 if (vb && buffer_eq_icase_slen(vb, v, vlen)) { 494 /* ignore it if they are the same */ 495 return 0; /* ignore header */ 496 } 497 else { 498 return http_request_header_line_invalid(r, 400, "duplicate If-Modified-Since header -> 400"); 499 } 500 } 501 break; 502 case HTTP_HEADER_TRANSFER_ENCODING: 503 if (HTTP_VERSION_1_1 != r->http_version) { 504 return http_request_header_line_invalid(r, 400, 505 HTTP_VERSION_1_0 == r->http_version 506 ? "HTTP/1.0 with Transfer-Encoding (bad HTTP/1.0 proxy?) -> 400" 507 : "HTTP/2 with Transfer-Encoding is invalid -> 400"); 508 } 509 510 if (!buffer_eq_icase_ss(v, vlen, CONST_STR_LEN("chunked"))) { 511 /* Transfer-Encoding might contain additional encodings, 512 * which are not currently supported by lighttpd */ 513 return http_request_header_line_invalid(r, 501, NULL); /* Not Implemented */ 514 } 515 r->reqbody_length = -1; 516 517 /* Transfer-Encoding is a hop-by-hop header, 518 * which must not be blindly forwarded to backends */ 519 return 0; /* skip header */ 520 } 521 522 http_header_request_append(r, id, k, klen, v, vlen); 523 524 if (saveb) { 525 *saveb = http_header_request_get(r, id, k, klen); 526 } 527 528 return 0; 529 } 530 531 __attribute_cold__ 532 static int http_request_parse_proto_loose(request_st * const restrict r, const char * const restrict ptr, const size_t len, const unsigned int http_parseopts) { 533 const char * proto = memchr(ptr, ' ', len); 534 if (NULL == proto) 535 return http_request_header_line_invalid(r, 400, "incomplete request line -> 400"); 536 proto = memchr(proto+1, ' ', len - (proto+1 - ptr)); 537 if (NULL == proto) 538 return http_request_header_line_invalid(r, 400, "incomplete request line -> 400"); 539 ++proto; 540 541 if (proto[0]=='H' && proto[1]=='T' && proto[2]=='T' && proto[3]=='P' && proto[4] == '/') { 542 if (proto[5] == '1' && proto[6] == '.' && (proto[7] == '1' || proto[7] == '0')) { 543 /* length already checked before calling this routine */ 544 /* (len != (size_t)(proto - ptr + 8)) */ 545 if (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT) /*(http_header_strict)*/ 546 return http_request_header_line_invalid(r, 400, "incomplete request line -> 400"); 547 r->http_version = (proto[7] == '1') ? HTTP_VERSION_1_1 : HTTP_VERSION_1_0; 548 } 549 else 550 return http_request_header_line_invalid(r, 505, "unknown HTTP version -> 505"); 551 } 552 else 553 return http_request_header_line_invalid(r, 400, "unknown protocol -> 400"); 554 555 /* keep-alive default: HTTP/1.1 -> true; HTTP/1.0 -> false */ 556 r->keep_alive = (HTTP_VERSION_1_0 != r->http_version); 557 558 return 0; 559 } 560 561 __attribute_cold__ 562 static const char * http_request_parse_reqline_uri(request_st * const restrict r, const char * const restrict uri, const size_t len, const unsigned int http_parseopts) { 563 const char *nuri; 564 if ((len > 7 && buffer_eq_icase_ssn(uri, "http://", 7) 565 && NULL != (nuri = memchr(uri + 7, '/', len-7))) 566 || 567 (len > 8 && buffer_eq_icase_ssn(uri, "https://", 8) 568 && NULL != (nuri = memchr(uri + 8, '/', len-8)))) { 569 const char * const host = uri + (uri[4] == ':' ? 7 : 8); 570 const size_t hostlen = nuri - host; 571 if (0 == hostlen || hostlen >= 1024) { /*(expecting < 256)*/ 572 http_request_header_line_invalid(r, 400, "uri-authority empty or too long -> 400"); 573 return NULL; 574 } 575 /* Insert as host header */ 576 http_header_request_set(r, HTTP_HEADER_HOST, CONST_STR_LEN("Host"), host, hostlen); 577 r->http_host = http_header_request_get(r, HTTP_HEADER_HOST, CONST_STR_LEN("Host")); 578 return nuri; 579 } else if (!(http_parseopts & HTTP_PARSEOPT_HEADER_STRICT) /*(!http_header_strict)*/ 580 || (HTTP_METHOD_CONNECT == r->http_method && (uri[0] == ':' || light_isdigit(uri[0]))) 581 || (HTTP_METHOD_OPTIONS == r->http_method && uri[0] == '*' && 1 == len)) { 582 /* (permitted) */ 583 return uri; 584 } else { 585 http_request_header_line_invalid(r, 400, "request-URI parse error -> 400"); 586 return NULL; 587 } 588 } 589 590 591 __attribute_cold__ 592 __attribute_noinline__ 593 static int http_request_parse_header_other(request_st * const restrict r, const char * const restrict k, const int klen, const unsigned int http_header_strict); 594 595 596 int 597 http_request_validate_pseudohdrs (request_st * const restrict r, const int scheme, const unsigned int http_parseopts) 598 { 599 /* :method is required to indicate method 600 * CONNECT method must have :method and :authority 601 * All other methods must have at least :method :scheme :path */ 602 603 if (HTTP_METHOD_UNSET == r->http_method) 604 return http_request_header_line_invalid(r, 400, 605 "missing pseudo-header method -> 400"); 606 607 if (HTTP_METHOD_CONNECT != r->http_method) { 608 if (!scheme) 609 return http_request_header_line_invalid(r, 400, 610 "missing pseudo-header scheme -> 400"); 611 612 if (buffer_string_is_empty(&r->target)) 613 return http_request_header_line_invalid(r, 400, 614 "missing pseudo-header path -> 400"); 615 616 const char * const uri = r->target.ptr; 617 if (*uri != '/') { /* (common case: (*uri == '/')) */ 618 if (uri[0] != '*' || uri[1] != '\0' 619 || HTTP_METHOD_OPTIONS != r->http_method) 620 return http_request_header_line_invalid(r, 400, 621 "invalid pseudo-header path -> 400"); 622 } 623 } 624 else { /* HTTP_METHOD_CONNECT */ 625 if (NULL == r->http_host) 626 return http_request_header_line_invalid(r, 400, 627 "missing pseudo-header authority -> 400"); 628 if (!buffer_string_is_empty(&r->target) || scheme) 629 return http_request_header_line_invalid(r, 400, 630 "invalid pseudo-header with CONNECT -> 400"); 631 /*(reuse uri and ulen to assign to r->target)*/ 632 buffer_copy_buffer(&r->target, r->http_host); 633 } 634 buffer_copy_buffer(&r->target_orig, &r->target); 635 636 /* r->http_host, if set, is checked with http_request_host_policy() 637 * in http_request_parse() */ 638 639 /* copied and modified from end of http_request_parse_reqline() */ 640 641 /* check uri for invalid characters */ 642 const unsigned int http_header_strict = 643 (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT); 644 if (http_header_strict 645 && (http_parseopts & HTTP_PARSEOPT_URL_NORMALIZE_CTRLS_REJECT)) 646 return 0; /* URI will be checked in http_request_parse_target() */ 647 648 const uint32_t ulen = buffer_string_length(&r->target); 649 const uint8_t * const uri = (uint8_t *)r->target.ptr; 650 if (http_header_strict) { 651 for (uint32_t i = 0; i < ulen; ++i) { 652 if (!request_uri_is_valid_char(uri[i])) 653 return http_request_header_char_invalid(r, uri[i], 654 "invalid character in URI -> 400"); 655 } 656 } 657 else { 658 if (NULL != memchr(uri, '\0', ulen)) 659 return http_request_header_char_invalid(r, '\0', 660 "invalid character in header -> 400"); 661 } 662 663 return 0; 664 } 665 666 667 int 668 http_request_parse_header (request_st * const restrict r, http_header_parse_ctx * const restrict hpctx) 669 { 670 /* Note: k and v might not be '\0' terminated strings; 671 * care must be taken to avoid libc funcs which expect z-strings */ 672 const char * const restrict k = hpctx->k; 673 const char * const restrict v = hpctx->v; 674 const uint32_t klen = hpctx->klen; 675 const uint32_t vlen = hpctx->vlen; 676 677 if (0 == klen) 678 return http_request_header_line_invalid(r, 400, 679 "invalid header key -> 400"); 680 if (0 == vlen) 681 return http_request_header_line_invalid(r, 400, 682 "invalid header value -> 400"); 683 684 if ((hpctx->hlen += klen + vlen + 4) > hpctx->max_request_field_size) { 685 /*(configurable with server.max-request-field-size; default 8k)*/ 686 #if 1 /* emit to error log for people sending large headers */ 687 log_error(r->conf.errh, __FILE__, __LINE__, 688 "oversized request header -> 431"); 689 return 431; /* Request Header Fields Too Large */ 690 #else 691 /* 431 Request Header Fields Too Large */ 692 return http_request_header_line_invalid(r, 431, 693 "oversized request header -> 431"); 694 #endif 695 } 696 697 if (2 == klen && k[0] == 't' && k[1] == 'e' 698 && !buffer_eq_icase_ss(v, vlen, CONST_STR_LEN("trailers"))) 699 return http_request_header_line_invalid(r, 400, 700 "invalid TE header value with HTTP/2 -> 400"); 701 702 if (!hpctx->trailers) { 703 if (*k == ':') { 704 /* HTTP/2 request pseudo-header fields */ 705 if (!hpctx->pseudo) /*(pseudo header after non-pseudo header)*/ 706 return http_request_header_line_invalid(r, 400, 707 "invalid pseudo-header -> 400"); 708 switch (klen-1) { 709 case 4: 710 if (0 == memcmp(k+1, "path", 4)) { 711 if (!buffer_string_is_empty(&r->target)) 712 return http_request_header_line_invalid(r, 400, 713 "repeated pseudo-header -> 400"); 714 buffer_copy_string_len(&r->target, v, vlen); 715 return 0; 716 } 717 break; 718 case 6: 719 if (0 == memcmp(k+1, "method", 6)) { 720 if (HTTP_METHOD_UNSET != r->http_method) 721 return http_request_header_line_invalid(r, 400, 722 "repeated pseudo-header -> 400"); 723 r->http_method = get_http_method_key(v, vlen); 724 if (HTTP_METHOD_UNSET >= r->http_method) 725 return http_request_header_line_invalid(r, 501, 726 "unknown http-method -> 501"); 727 return 0; 728 } 729 else if (0 == memcmp(k+1, "scheme", 6)) { 730 if (hpctx->scheme) 731 return http_request_header_line_invalid(r, 400, 732 "repeated pseudo-header -> 400"); 733 switch (vlen) {/*(validated, but then ignored)*/ 734 case 5: /* "https" */ 735 if (v[4]!='s') break; 736 __attribute_fallthrough__ 737 case 4: /* "http" */ 738 if (v[0]=='h' && v[1]=='t' && v[2]=='t' && v[3]=='p') { 739 hpctx->scheme = 1; 740 return 0; 741 } 742 break; 743 default: 744 break; 745 } 746 return http_request_header_line_invalid(r, 400, 747 "unknown pseudo-header scheme -> 400"); 748 } 749 break; 750 case 9: 751 if (0 == memcmp(k+1, "authority", 9)) { 752 if (r->http_host) 753 return http_request_header_line_invalid(r, 400, 754 "repeated pseudo-header -> 400"); 755 if (vlen >= 1024) /*(expecting < 256)*/ 756 return http_request_header_line_invalid(r, 400, 757 "invalid pseudo-header authority too long -> 400"); 758 /* insert as host header */ 759 http_header_request_set(r, HTTP_HEADER_HOST, 760 CONST_STR_LEN("host"), v, vlen); 761 r->http_host = 762 http_header_request_get(r, HTTP_HEADER_HOST, 763 CONST_STR_LEN("Host")); 764 return 0; 765 } 766 break; 767 default: 768 break; 769 } 770 return http_request_header_line_invalid(r, 400, 771 "invalid pseudo-header -> 400"); 772 } 773 else { /*(non-pseudo headers)*/ 774 if (hpctx->pseudo) { /*(transition to non-pseudo headers)*/ 775 hpctx->pseudo = 0; 776 int status = 777 http_request_validate_pseudohdrs(r, hpctx->scheme, 778 hpctx->http_parseopts); 779 if (0 != status) return status; 780 } 781 782 const unsigned int http_header_strict = 783 (hpctx->http_parseopts & HTTP_PARSEOPT_HEADER_STRICT); 784 785 for (uint32_t j = 0; j < klen; ++j) { 786 if (light_islower(k[j]) || k[j] == '-') 787 continue; /*(common cases)*/ 788 if (light_isupper(k[j])) 789 return 400; 790 if (0 != http_request_parse_header_other(r, k+j, klen-j, 791 http_header_strict)) 792 return 400; 793 break; 794 } 795 796 if (http_header_strict) { 797 for (uint32_t j = 0; j < vlen; ++j) { 798 if ((((uint8_t *)v)[j] < 32 && v[j] != '\t') || v[j]==127) 799 return http_request_header_char_invalid(r, v[j], 800 "invalid character in header -> 400"); 801 } 802 } 803 else { 804 if (NULL != memchr(v, '\0', vlen)) 805 return http_request_header_char_invalid(r, '\0', 806 "invalid character in header -> 400"); 807 } 808 809 const enum http_header_e id = 810 hpctx->id ? hpctx->id : http_header_hkey_get_lc(k, klen); 811 return http_request_parse_single_header(r, id, k, klen, v, vlen); 812 } 813 } 814 else { /*(trailers)*/ 815 /* ignore trailers (after required HPACK decoding) if streaming 816 * request body to backend since headers have already been sent 817 * to backend via Common Gateway Interface (CGI) (CGI, FastCGI, 818 * SCGI, etc) or HTTP/1.1 (proxy) (mod_proxy does not currently 819 * support using HTTP/2 to connect to backends) */ 820 #if 0 /* (if needed, save flag in hpctx instead of fdevent.h dependency)*/ 821 if (r->conf.stream_request_body & FDEVENT_STREAM_REQUEST) 822 return 0; 823 #endif 824 /* Note: do not unconditionally merge into headers since if 825 * headers had already been sent to backend, then mod_accesslog 826 * logging of request headers might be inaccurate. 827 * Many simple backends do not support HTTP/1.1 requests sending 828 * Transfer-Encoding: chunked, and even those that do might not 829 * handle trailers. Some backends do not even support HTTP/1.1. 830 * For all these reasons, ignore trailers if streaming request 831 * body to backend. Revisit in future if adding support for 832 * connecting to backends using HTTP/2 (with explicit config 833 * option to force connecting to backends using HTTP/2) */ 834 835 /* XXX: TODO: request trailers not handled if streaming reqbody 836 * XXX: must ensure that trailers are not disallowed field-names 837 */ 838 839 return 0; 840 } 841 } 842 843 844 static int http_request_parse_reqline(request_st * const restrict r, const char * const restrict ptr, const unsigned short * const restrict hoff, const unsigned int http_parseopts) { 845 size_t len = hoff[2]; 846 847 /* parse the first line of the request 848 * <method> <uri> <protocol>\r\n 849 * */ 850 if (len < 13) /* minimum len with (!http_header_strict): "x x HTTP/1.0\n" */ 851 return http_request_header_line_invalid(r, 400, "invalid request line (too short) -> 400"); 852 if (ptr[len-2] == '\r') 853 len-=2; 854 else if (!(http_parseopts & HTTP_PARSEOPT_HEADER_STRICT)) /*(!http_header_strict)*/ 855 len-=1; 856 else 857 return http_request_header_line_invalid(r, 400, "missing CR before LF in header -> 400"); 858 859 /* 860 * RFC7230: 861 * HTTP-version = HTTP-name "/" DIGIT "." DIGIT 862 * HTTP-name = %x48.54.54.50 ; "HTTP", case-sensitive 863 */ 864 865 /* protocol is expected to be " HTTP/1.1" or " HTTP/1.0" at end of line */ 866 union proto_un { 867 char c[8]; 868 uint64_t u; 869 }; 870 static const union proto_un http_1_1 = {{'H','T','T','P','/','1','.','1'}}; 871 static const union proto_un http_1_0 = {{'H','T','T','P','/','1','.','0'}}; 872 const char *p = ptr + len - 8; 873 union proto_un proto8; 874 proto8.c[0]=p[0]; proto8.c[1]=p[1]; proto8.c[2]=p[2]; proto8.c[3]=p[3]; 875 proto8.c[4]=p[4]; proto8.c[5]=p[5]; proto8.c[6]=p[6]; proto8.c[7]=p[7]; 876 if (p[-1] == ' ' && http_1_1.u == proto8.u) { 877 r->http_version = HTTP_VERSION_1_1; 878 r->keep_alive = 1; /* keep-alive default: HTTP/1.1 -> true */ 879 } 880 else if (p[-1] == ' ' && http_1_0.u == proto8.u) { 881 r->http_version = HTTP_VERSION_1_0; 882 r->keep_alive = 0; /* keep-alive default: HTTP/1.0 -> false */ 883 } 884 else { 885 int status = http_request_parse_proto_loose(r,ptr,len,http_parseopts); 886 if (0 != status) return status; 887 /*(space char must exist if http_request_parse_proto_loose() succeeds)*/ 888 for (p = ptr + len - 9; p[-1] != ' '; --p) ; 889 } 890 891 /* method is expected to be a short string in the general case */ 892 size_t i = 0; 893 while (ptr[i] != ' ') ++i; 894 #if 0 /*(space must exist if protocol was parsed successfully)*/ 895 while (i < len && ptr[i] != ' ') ++i; 896 if (ptr[i] != ' ') 897 return http_request_header_line_invalid(r, 400, "incomplete request line -> 400"); 898 #endif 899 900 r->http_method = get_http_method_key(ptr, i); 901 if (HTTP_METHOD_UNSET >= r->http_method) 902 return http_request_header_line_invalid(r, 501, "unknown http-method -> 501"); 903 904 const char *uri = ptr + i + 1; 905 906 if (uri == p) 907 return http_request_header_line_invalid(r, 400, "no uri specified -> 400"); 908 len = (size_t)(p - uri - 1); 909 910 if (*uri != '/') { /* (common case: (*uri == '/')) */ 911 uri = http_request_parse_reqline_uri(r, uri, len, http_parseopts); 912 if (NULL == uri) return 400; 913 len = (size_t)(p - uri - 1); 914 } 915 916 if (0 == len) 917 return http_request_header_line_invalid(r, 400, "no uri specified -> 400"); 918 919 /* check uri for invalid characters */ 920 if (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT) { /* http_header_strict */ 921 if ((http_parseopts & HTTP_PARSEOPT_URL_NORMALIZE_CTRLS_REJECT)) { 922 /* URI will be checked in http_request_parse_target() */ 923 } 924 else { 925 for (i = 0; i < len; ++i) { 926 if (!request_uri_is_valid_char(uri[i])) 927 return http_request_header_char_invalid(r, uri[i], "invalid character in URI -> 400"); 928 } 929 } 930 } 931 else { 932 /* check entire set of request headers for '\0' */ 933 if (NULL != memchr(ptr, '\0', hoff[hoff[0]])) 934 return http_request_header_char_invalid(r, '\0', "invalid character in header -> 400"); 935 } 936 937 buffer_copy_string_len(&r->target, uri, len); 938 buffer_copy_string_len(&r->target_orig, uri, len); 939 return 0; 940 } 941 942 int http_request_parse_target(request_st * const r, int scheme_port) { 943 /* URI is parsed into components at start of request and may 944 * also be re-parsed upon HANDLER_COMEBACK during the request 945 * r->target is expected to be a "/url-part?query-part" 946 * (and *not* a fully-qualified URI starting https://...) 947 * r->uri.authority is expected to be parsed elsewhere into r->http_host 948 */ 949 950 /** 951 * prepare strings 952 * 953 * - uri.path 954 * - uri.query 955 * 956 */ 957 958 /** 959 * Name according to RFC 2396 960 * 961 * - scheme 962 * - authority 963 * - path 964 * - query 965 * 966 * (scheme)://(authority)(path)?(query)#fragment 967 * 968 */ 969 970 /* take initial scheme value from connection-level state 971 * (request r->uri.scheme can be overwritten for later, 972 * for example by mod_extforward or mod_magnet) */ 973 buffer_copy_string_len(&r->uri.scheme, "https", scheme_port == 443 ? 5 : 4); 974 975 buffer * const target = &r->target; 976 if (r->http_method == HTTP_METHOD_CONNECT 977 || (r->http_method == HTTP_METHOD_OPTIONS 978 && target->ptr[0] == '*' 979 && target->ptr[1] == '\0')) { 980 /* CONNECT ... (or) OPTIONS * ... */ 981 buffer_copy_buffer(&r->uri.path, target); 982 buffer_clear(&r->uri.query); 983 return 0; 984 } 985 986 char *qstr; 987 if (r->conf.http_parseopts & HTTP_PARSEOPT_URL_NORMALIZE) { 988 /*uint32_t len = (uint32_t)buffer_string_length(target);*/ 989 int qs = burl_normalize(target, r->tmp_buf, r->conf.http_parseopts); 990 if (-2 == qs) 991 return http_request_header_line_invalid(r, 400, 992 "invalid character in URI -> 400"); /* Bad Request */ 993 qstr = (-1 == qs) ? NULL : target->ptr+qs; 994 #if 0 /* future: might enable here, or below for all requests */ 995 /* (Note: total header size not recalculated on HANDLER_COMEBACK 996 * even if other request headers changed during processing) 997 * (If (0 != r->loops_per_request), then the generated 998 * request is too large. Should a different error be returned?) */ 999 r->rqst_header_len -= len; 1000 len = buffer_string_length(target); 1001 r->rqst_header_len += len; 1002 if (len > MAX_HTTP_REQUEST_URI) { 1003 return 414; /* 414 URI Too Long */ 1004 } 1005 if (r->rqst_header_len > MAX_HTTP_REQUEST_HEADER) { 1006 log_error(r->conf.errh, __FILE__, __LINE__, 1007 "request header fields too large: %u -> 431", 1008 r->rqst_header_len); 1009 return 431; /* Request Header Fields Too Large */ 1010 } 1011 #endif 1012 } 1013 else { 1014 size_t rlen = buffer_string_length(target); 1015 qstr = memchr(target->ptr, '#', rlen);/* discard fragment */ 1016 if (qstr) { 1017 rlen = (size_t)(qstr - target->ptr); 1018 buffer_string_set_length(target, rlen); 1019 } 1020 qstr = memchr(target->ptr, '?', rlen); 1021 } 1022 1023 /** extract query string from target */ 1024 const char * const pstr = target->ptr; 1025 const uint32_t rlen = buffer_string_length(target); 1026 uint32_t plen; 1027 if (NULL != qstr) { 1028 plen = (uint32_t)(qstr - pstr); 1029 buffer_copy_string_len(&r->uri.query, qstr + 1, rlen - plen - 1); 1030 } 1031 else { 1032 plen = rlen; 1033 buffer_clear(&r->uri.query); 1034 } 1035 buffer_copy_string_len(&r->uri.path, pstr, plen); 1036 1037 /* decode url to path 1038 * 1039 * - decode url-encodings (e.g. %20 -> ' ') 1040 * - remove path-modifiers (e.g. /../) 1041 */ 1042 1043 buffer_urldecode_path(&r->uri.path); 1044 buffer_path_simplify(&r->uri.path, &r->uri.path); 1045 if (r->uri.path.ptr[0] != '/') 1046 return http_request_header_line_invalid(r, 400, 1047 "uri-path does not begin with '/' -> 400"); /* Bad Request */ 1048 1049 return 0; 1050 } 1051 1052 __attribute_cold__ 1053 __attribute_noinline__ 1054 static int http_request_parse_header_other(request_st * const restrict r, const char * const restrict k, const int klen, const unsigned int http_header_strict) { 1055 for (int i = 0; i < klen; ++i) { 1056 if (light_isalpha(k[i]) || k[i] == '-') continue; /*(common cases)*/ 1057 /** 1058 * 1*<any CHAR except CTLs or separators> 1059 * CTLs == 0-31 + 127, CHAR = 7-bit ascii (0..127) 1060 * 1061 */ 1062 switch(k[i]) { 1063 case ' ': 1064 case '\t': 1065 return http_request_header_line_invalid(r, 400, "WS character in key -> 400"); 1066 case '(': 1067 case ')': 1068 case '<': 1069 case '>': 1070 case '@': 1071 case ',': 1072 case ';': 1073 case '\\': 1074 case '\"': 1075 case '/': 1076 case '[': 1077 case ']': 1078 case '?': 1079 case '=': 1080 case '{': 1081 case '}': 1082 return http_request_header_char_invalid(r, k[i], "invalid character in header key -> 400"); 1083 default: 1084 if (http_header_strict ? (k[i] < 32 || ((unsigned char *)k)[i] >= 127) : k[i] == '\0') 1085 return http_request_header_char_invalid(r, k[i], "invalid character in header key -> 400"); 1086 break; /* ok */ 1087 } 1088 } 1089 return 0; 1090 } 1091 1092 static int http_request_parse_headers(request_st * const restrict r, char * const restrict ptr, const unsigned short * const restrict hoff, const unsigned int http_parseopts) { 1093 const unsigned int http_header_strict = (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT); 1094 1095 #if 0 /*(not checked here; will later result in invalid label for HTTP header)*/ 1096 int i = hoff[2]; 1097 1098 if (ptr[i] == ' ' || ptr[i] == '\t') { 1099 return http_request_header_line_invalid(r, 400, "WS at the start of first line -> 400"); 1100 } 1101 #endif 1102 1103 for (int i = 2; i < hoff[0]; ++i) { 1104 const char *k = ptr + hoff[i]; 1105 /* one past last line hoff[hoff[0]] is to final "\r\n" */ 1106 char *end = ptr + hoff[i+1]; 1107 1108 const char *colon = memchr(k, ':', end - k); 1109 if (NULL == colon) 1110 return http_request_header_line_invalid(r, 400, "invalid header missing ':' -> 400"); 1111 1112 const char *v = colon + 1; 1113 1114 /* RFC7230 Hypertext Transfer Protocol (HTTP/1.1): Message Syntax and Routing 1115 * 3.2.4. Field Parsing 1116 * [...] 1117 * No whitespace is allowed between the header field-name and colon. In 1118 * the past, differences in the handling of such whitespace have led to 1119 * security vulnerabilities in request routing and response handling. A 1120 * server MUST reject any received request message that contains 1121 * whitespace between a header field-name and colon with a response code 1122 * of 400 (Bad Request). A proxy MUST remove any such whitespace from a 1123 * response message before forwarding the message downstream. 1124 */ 1125 /* (line k[-1] is always preceded by a '\n', 1126 * including first header after request-line, 1127 * so no need to check colon != k) */ 1128 if (colon[-1] == ' ' || colon[-1] == '\t') { 1129 if (http_header_strict) { 1130 return http_request_header_line_invalid(r, 400, "invalid whitespace between field-name and colon -> 400"); 1131 } 1132 else { 1133 /* remove trailing whitespace from key(if !http_header_strict)*/ 1134 do { --colon; } while (colon[-1] == ' ' || colon[-1] == '\t'); 1135 } 1136 } 1137 1138 const int klen = (int)(colon - k); 1139 if (0 == klen) 1140 return http_request_header_line_invalid(r, 400, "invalid header key -> 400"); 1141 const enum http_header_e id = http_header_hkey_get(k, klen); 1142 1143 if (id == HTTP_HEADER_OTHER) { 1144 for (int j = 0; j < klen; ++j) { 1145 if (light_isalpha(k[j]) || k[j] == '-') continue; /*(common cases)*/ 1146 if (0 != http_request_parse_header_other(r, k+j, klen-j, http_header_strict)) 1147 return 400; 1148 break; 1149 } 1150 } 1151 1152 /* remove leading whitespace from value */ 1153 while (*v == ' ' || *v == '\t') ++v; 1154 1155 for (; i+1 <= hoff[0]; ++i) { 1156 end = ptr + hoff[i+1]; 1157 if (end[0] != ' ' && end[0] != '\t') break; 1158 1159 /* line folding */ 1160 #ifdef __COVERITY__ 1161 force_assert(end - k >= 2); 1162 #endif 1163 if (end[-2] == '\r') 1164 end[-2] = ' '; 1165 else if (http_header_strict) 1166 return http_request_header_line_invalid(r, 400, "missing CR before LF in header -> 400"); 1167 end[-1] = ' '; 1168 } 1169 #ifdef __COVERITY__ 1170 /*(buf holding k has non-zero request-line, so end[-2] valid)*/ 1171 force_assert(end >= k + 2); 1172 #endif 1173 if (end[-2] == '\r') 1174 --end; 1175 else if (http_header_strict) 1176 return http_request_header_line_invalid(r, 400, "missing CR before LF in header -> 400"); 1177 /* remove trailing whitespace from value (+ remove '\r\n') */ 1178 /* (line k[-1] is always preceded by a '\n', 1179 * including first header after request-line, 1180 * so no need to check (end != k)) */ 1181 do { --end; } while (end[-1] == ' ' || end[-1] == '\t'); 1182 1183 const int vlen = (int)(end - v); 1184 /* empty header-fields are not allowed by HTTP-RFC, we just ignore them */ 1185 if (vlen <= 0) continue; /* ignore header */ 1186 1187 if (http_header_strict) { 1188 for (int j = 0; j < vlen; ++j) { 1189 if ((((unsigned char *)v)[j] < 32 && v[j] != '\t') || v[j]==127) 1190 return http_request_header_char_invalid(r, v[j], "invalid character in header -> 400"); 1191 } 1192 } /* else URI already checked in http_request_parse_reqline() for any '\0' */ 1193 1194 int status = http_request_parse_single_header(r, id, k, (size_t)klen, v, (size_t)vlen); 1195 if (0 != status) return status; 1196 } 1197 1198 return 0; 1199 } 1200 1201 1202 static int 1203 http_request_parse (request_st * const restrict r, const int scheme_port) 1204 { 1205 int status = http_request_parse_target(r, scheme_port); 1206 if (0 != status) return status; 1207 1208 /*(r->http_host might not be set until after parsing request headers)*/ 1209 buffer_copy_buffer(&r->uri.authority, r->http_host);/*(copy even if empty)*/ 1210 buffer_to_lower(&r->uri.authority); 1211 1212 /* post-processing */ 1213 const unsigned int http_parseopts = r->conf.http_parseopts; 1214 1215 /* check hostname field if it is set */ 1216 if (r->http_host) { 1217 if (0 != http_request_host_policy(r->http_host, 1218 http_parseopts, scheme_port)) 1219 return http_request_header_line_invalid(r, 400, "Invalid Hostname -> 400"); 1220 } 1221 else { 1222 if (r->http_version >= HTTP_VERSION_1_1) 1223 return http_request_header_line_invalid(r, 400, "HTTP/1.1 but Host missing -> 400"); 1224 } 1225 1226 if (0 == r->reqbody_length) { 1227 /* POST requires Content-Length (or Transfer-Encoding) 1228 * (-1 == r->reqbody_length when Transfer-Encoding: chunked)*/ 1229 if (HTTP_METHOD_POST == r->http_method 1230 && !light_btst(r->rqst_htags, HTTP_HEADER_CONTENT_LENGTH)) { 1231 return http_request_header_line_invalid(r, 411, "POST-request, but content-length missing -> 411"); 1232 } 1233 } 1234 else { 1235 /* (-1 == r->reqbody_length when Transfer-Encoding: chunked)*/ 1236 if (-1 == r->reqbody_length 1237 && light_btst(r->rqst_htags, HTTP_HEADER_CONTENT_LENGTH)) { 1238 /* RFC7230 Hypertext Transfer Protocol (HTTP/1.1): Message Syntax and Routing 1239 * 3.3.3. Message Body Length 1240 * [...] 1241 * If a message is received with both a Transfer-Encoding and a 1242 * Content-Length header field, the Transfer-Encoding overrides the 1243 * Content-Length. Such a message might indicate an attempt to 1244 * perform request smuggling (Section 9.5) or response splitting 1245 * (Section 9.4) and ought to be handled as an error. A sender MUST 1246 * remove the received Content-Length field prior to forwarding such 1247 * a message downstream. 1248 */ 1249 const unsigned int http_header_strict = 1250 (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT); 1251 if (http_header_strict) { 1252 return http_request_header_line_invalid(r, 400, "invalid Transfer-Encoding + Content-Length -> 400"); 1253 } 1254 else { 1255 /* ignore Content-Length */ 1256 http_header_request_unset(r, HTTP_HEADER_CONTENT_LENGTH, CONST_STR_LEN("Content-Length")); 1257 } 1258 } 1259 if (http_method_get_or_head(r->http_method) 1260 && !(http_parseopts & HTTP_PARSEOPT_METHOD_GET_BODY)) { 1261 return http_request_header_line_invalid(r, 400, "GET/HEAD with content-length -> 400"); 1262 } 1263 } 1264 1265 return 0; 1266 } 1267 1268 1269 static int 1270 http_request_parse_hoff (request_st * const restrict r, char * const restrict hdrs, const unsigned short * const restrict hoff, const int scheme_port) 1271 { 1272 /* 1273 * Request: "^(GET|POST|HEAD|...) ([^ ]+(\\?[^ ]+|)) (HTTP/1\\.[01])$" 1274 * Header : "^([-a-zA-Z]+): (.+)$" 1275 * End : "^$" 1276 */ 1277 1278 int status; 1279 const unsigned int http_parseopts = r->conf.http_parseopts; 1280 1281 status = http_request_parse_reqline(r, hdrs, hoff, http_parseopts); 1282 if (0 != status) return status; 1283 1284 status = http_request_parse_headers(r, hdrs, hoff, http_parseopts); 1285 if (0 != status) return status; 1286 1287 return http_request_parse(r, scheme_port); 1288 } 1289 1290 1291 static void 1292 http_request_headers_fin (request_st * const restrict r) 1293 { 1294 if (0 == r->http_status) { 1295 #if 0 1296 r->conditional_is_valid = (1 << COMP_SERVER_SOCKET) 1297 | (1 << COMP_HTTP_SCHEME) 1298 | (1 << COMP_HTTP_HOST) 1299 | (1 << COMP_HTTP_REMOTE_IP) 1300 | (1 << COMP_HTTP_REQUEST_METHOD) 1301 | (1 << COMP_HTTP_URL) 1302 | (1 << COMP_HTTP_QUERY_STRING) 1303 | (1 << COMP_HTTP_REQUEST_HEADER); 1304 #else 1305 /* all config conditions are valid after parsing header 1306 * (set all bits; remove dependency on plugin_config.h) */ 1307 r->conditional_is_valid = ~0u; 1308 #endif 1309 } 1310 else { 1311 r->keep_alive = 0; 1312 r->reqbody_length = 0; 1313 } 1314 } 1315 1316 1317 void 1318 http_request_headers_process (request_st * const restrict r, char * const restrict hdrs, const unsigned short * const restrict hoff, const int scheme_port) 1319 { 1320 r->http_status = http_request_parse_hoff(r, hdrs, hoff, scheme_port); 1321 1322 http_request_headers_fin(r); 1323 1324 if (0 != r->http_status) { 1325 if (r->conf.log_request_header_on_error) { 1326 /*(http_request_parse_headers() modifies hdrs only to 1327 * undo line-wrapping in-place using spaces)*/ 1328 log_error(r->conf.errh, __FILE__, __LINE__, 1329 "request-header:\n%.*s", (int)r->rqst_header_len, hdrs); 1330 } 1331 } 1332 } 1333 1334 1335 void 1336 http_request_headers_process_h2 (request_st * const restrict r, const int scheme_port) 1337 { 1338 if (0 == r->http_status) 1339 r->http_status = http_request_parse(r, scheme_port); 1340 1341 if (0 == r->http_status) { 1342 if (light_btst(r->rqst_htags, HTTP_HEADER_CONNECTION)) 1343 r->http_status = http_request_header_line_invalid(r, 400, 1344 "invalid Connection header with HTTP/2 -> 400"); 1345 } 1346 1347 http_request_headers_fin(r); 1348 1349 /* limited; headers not collected into a single buf for HTTP/2 */ 1350 if (__builtin_expect( (0 != r->http_status), 0)) { 1351 if (r->conf.log_request_header_on_error) { 1352 log_error(r->conf.errh, __FILE__, __LINE__, 1353 "request-header:\n:authority: %s\n:method: %s\n:path: %s", 1354 r->http_host ? r->http_host->ptr : "", 1355 (HTTP_METHOD_UNSET != r->http_method) 1356 ? get_http_method_name(r->http_method) 1357 : "", 1358 !buffer_string_is_empty(&r->target) ? r->target.ptr : ""); 1359 } 1360 } 1361 1362 /* ignore Upgrade if using HTTP/2 */ 1363 if (light_btst(r->rqst_htags, HTTP_HEADER_UPGRADE)) 1364 http_header_request_unset(r, HTTP_HEADER_UPGRADE, 1365 CONST_STR_LEN("upgrade")); 1366 /* XXX: should filter out other hop-by-hop connection headers, too */ 1367 } 1368