1 /* 2 * request - HTTP request processing 3 * 4 * Fully-rewritten from original 5 * Copyright(c) 2018 Glenn Strauss gstrauss()gluelogic.com All rights reserved 6 * License: BSD 3-clause (same as lighttpd) 7 */ 8 #include "first.h" 9 10 #include "request.h" 11 #include "burl.h" 12 #include "http_header.h" 13 #include "http_kv.h" 14 #include "log.h" 15 #include "sock_addr.h" 16 17 #include <limits.h> 18 #include <stdint.h> 19 #include <stdlib.h> 20 #include <string.h> 21 22 static int request_check_hostname(buffer * const host) { 23 /* 24 * hostport = host [ ":" port ] 25 * host = hostname | IPv4address | IPv6address 26 * hostname = *( domainlabel "." ) toplabel [ "." ] 27 * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum 28 * toplabel = alpha | alpha *( alphanum | "-" ) alphanum 29 * IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit 30 * IPv6address = "[" ... "]" 31 * port = *digit 32 */ 33 34 const char *h = host->ptr; 35 36 if (*h != '[') { 37 uint32_t len = buffer_clen(host); 38 const char * const colon = memchr(h, ':', len); 39 uint32_t hlen = colon ? (uint32_t)(colon - h) : len; 40 41 /* if hostname ends in ".", strip it */ 42 if (__builtin_expect( (0 == hlen), 0)) return -1; 43 if (__builtin_expect( (h[hlen-1] == '.'), 0)) { 44 /* shift port info one left */ 45 if (--hlen == 0) return -1; 46 --len; 47 if (NULL != colon) 48 memmove(host->ptr+hlen, colon, len - hlen); 49 buffer_truncate(host, len); 50 } 51 52 int label_len = 0; 53 int allnumeric = 1; 54 int numeric = 1; 55 int level = 0; 56 for (uint32_t i = 0; i < hlen; ++i) { 57 const int ch = h[i]; 58 ++label_len; 59 if (light_isdigit(ch)) 60 continue; 61 else if ((light_isalpha(ch) || (ch == '-' && i != 0))) 62 numeric = 0; 63 else if (ch == '.' && 1 != label_len && '-' != h[i+1]) { 64 allnumeric &= numeric; 65 numeric = 1; 66 label_len = 0; 67 ++level; 68 } 69 else 70 return -1; 71 } 72 /* (if last segment numeric, then IPv4 and must have 4 numeric parts) */ 73 if (0 == label_len || (numeric && (level != 3 || !allnumeric))) 74 return -1; 75 76 h += hlen; 77 } 78 else { /* IPv6 address */ 79 /* check the address inside [...]; note: not fully validating */ 80 /* (note: not allowing scoped literals, e.g. %eth0 suffix) */ 81 ++h; /* step past '[' */ 82 int cnt = 0; 83 while (light_isxdigit(*h) || *h == '.' || (*h == ':' && ++cnt < 8)) ++h; 84 /*(invalid char, too many ':', missing ']', or empty "[]")*/ 85 if (*h != ']' || h - host->ptr == 1) return -1; 86 ++h; /* step past ']' */ 87 } 88 89 /* check numerical port, if present */ 90 if (*h == ':') { 91 if (__builtin_expect( (h[1] == '\0'), 0)) /*(remove trailing colon)*/ 92 buffer_truncate(host, h - host->ptr); 93 do { ++h; } while (light_isdigit(*h)); 94 } 95 96 return (*h == '\0') ? 0 : -1; 97 } 98 99 int http_request_host_normalize(buffer * const b, const int scheme_port) { 100 /* 101 * check for and canonicalize numeric IP address and portnum (optional) 102 * (IP address may be followed by ":portnum" (optional)) 103 * - IPv6: "[...]" 104 * - IPv4: "x.x.x.x" 105 * - IPv4: 12345678 (32-bit decimal number) 106 * - IPv4: 012345678 (32-bit octal number) 107 * - IPv4: 0x12345678 (32-bit hex number) 108 * 109 * allow any chars (except ':' and '\0' and stray '[' or ']') 110 * (other code may check chars more strictly or more pedantically) 111 * ':' delimits (optional) port at end of string 112 * "[]" wraps IPv6 address literal 113 * '\0' should have been rejected earlier were it present 114 * 115 * any chars includes, but is not limited to: 116 * - allow '-' any where, even at beginning of word 117 * (security caution: might be confused for cmd flag if passed to shell) 118 * - allow all-digit TLDs 119 * (might be mistaken for IPv4 addr by inet_aton() 120 * unless non-digits appear in subdomain) 121 */ 122 123 /* Note: not using getaddrinfo() since it does not support "[]" around IPv6 124 * and is not as lenient as inet_aton() and inet_addr() for IPv4 strings. 125 * Not using inet_pton() (when available) on IPv4 for similar reasons. */ 126 127 const char * const p = b->ptr; 128 const size_t blen = buffer_clen(b); 129 long port = 0; 130 131 if (*p != '[') { 132 char * const colon = (char *)memchr(p, ':', blen); 133 if (colon) { 134 if (*p == ':') return -1; /*(empty host then port, or naked IPv6)*/ 135 if (colon[1] != '\0') { 136 char *e; 137 port = strtol(colon+1, &e, 0); /*(allow decimal, octal, hex)*/ 138 if (0 < port && port <= USHRT_MAX && *e == '\0') { 139 /* valid port */ 140 } else { 141 return -1; 142 } 143 } /*(else ignore stray colon at string end)*/ 144 buffer_truncate(b, (size_t)(colon - p)); /*(remove port str)*/ 145 } 146 147 if (light_isdigit(*p)) do { 148 /* (IPv4 address literal or domain starting w/ digit (e.g. 3com))*/ 149 /* (check one-element cache of normalized IPv4 address string) */ 150 static struct { char s[INET_ADDRSTRLEN]; size_t n; } laddr; 151 size_t n = colon ? (size_t)(colon - p) : blen; 152 sock_addr addr; 153 if (n == laddr.n && 0 == memcmp(p, laddr.s, n)) break; 154 if (1 == sock_addr_inet_pton(&addr, p, AF_INET, 0)) { 155 sock_addr_inet_ntop_copy_buffer(b, &addr); 156 n = buffer_clen(b); 157 if (n < sizeof(laddr.s)) memcpy(laddr.s, b->ptr, (laddr.n = n)); 158 } 159 } while (0); 160 } else do { /* IPv6 addr */ 161 #if defined(HAVE_IPV6) && defined(HAVE_INET_PTON) 162 163 /* (check one-element cache of normalized IPv4 address string) */ 164 static struct { char s[INET6_ADDRSTRLEN]; size_t n; } laddr; 165 sock_addr addr; 166 char *bracket = b->ptr+blen-1; 167 char *percent = strchr(b->ptr+1, '%'); 168 size_t len; 169 int rc; 170 char buf[INET6_ADDRSTRLEN+16]; /*(+16 for potential %interface name)*/ 171 if (blen <= 2) return -1; /*(invalid "[]")*/ 172 if (*bracket != ']') { 173 bracket = (char *)memchr(b->ptr+1, ']', blen-1); 174 if (NULL == bracket || bracket[1] != ':' || bracket - b->ptr == 1){ 175 return -1; 176 } 177 if (bracket[2] != '\0') { /*(ignore stray colon at string end)*/ 178 char *e; 179 port = strtol(bracket+2, &e, 0); /*(allow decimal, octal, hex)*/ 180 if (0 < port && port <= USHRT_MAX && *e == '\0') { 181 /* valid port */ 182 } else { 183 return -1; 184 } 185 } 186 } 187 188 len = (size_t)((percent ? percent : bracket) - (b->ptr+1)); 189 if (laddr.n == len && 0 == memcmp(laddr.s, b->ptr+1, len)) { 190 /* truncate after ']' and re-add normalized port, if needed */ 191 buffer_truncate(b, (size_t)(bracket - b->ptr + 1)); 192 break; 193 } 194 195 *bracket = '\0';/*(terminate IPv6 string)*/ 196 if (percent) *percent = '\0'; /*(remove %interface from address)*/ 197 rc = sock_addr_inet_pton(&addr, b->ptr+1, AF_INET6, 0); 198 if (percent) *percent = '%'; /*(restore %interface)*/ 199 *bracket = ']'; /*(restore bracket)*/ 200 if (1 != rc) return -1; 201 202 sock_addr_inet_ntop(&addr, buf, sizeof(buf)); 203 len = strlen(buf); 204 if (percent) { 205 if (percent > bracket) return -1; 206 if (len + (size_t)(bracket - percent) >= sizeof(buf)) return -1; 207 if (len < sizeof(laddr.s)) memcpy(laddr.s, buf, (laddr.n = len)); 208 memcpy(buf+len, percent, (size_t)(bracket - percent)); 209 len += (size_t)(bracket - percent); 210 } 211 buffer_truncate(b, 1); /* truncate after '[' */ 212 buffer_append_str2(b, buf, len, CONST_STR_LEN("]")); 213 214 #else 215 216 return -1; 217 218 #endif 219 } while (0); 220 221 if (0 != port && port != scheme_port) { 222 buffer_append_string_len(b, CONST_STR_LEN(":")); 223 buffer_append_int(b, (int)port); 224 } 225 226 return 0; 227 } 228 229 int http_request_host_policy (buffer * const b, const unsigned int http_parseopts, const int scheme_port) { 230 /* caller should lowercase, as is done in http_request_header_set_Host(), 231 * for consistency in case the value is used prior to calling policy func */ 232 /*buffer_to_lower(b);*/ 233 return (((http_parseopts & HTTP_PARSEOPT_HOST_STRICT) 234 && 0 != request_check_hostname(b)) 235 || ((http_parseopts & HTTP_PARSEOPT_HOST_NORMALIZE) 236 && 0 != http_request_host_normalize(b, scheme_port))); 237 } 238 239 __attribute_const__ 240 static int request_uri_is_valid_char(const unsigned char c) { 241 return (c > 32 && c != 127 && c != 255); 242 } 243 244 __attribute_cold__ 245 __attribute_noinline__ 246 static int http_request_header_line_invalid(request_st * const restrict r, const int status, const char * const restrict msg) { 247 if (r->conf.log_request_header_on_error) { 248 if (msg) log_error(r->conf.errh, __FILE__, __LINE__, "%s", msg); 249 } 250 return status; 251 } 252 253 __attribute_cold__ 254 __attribute_noinline__ 255 static int http_request_header_char_invalid(request_st * const restrict r, const char ch, const char * const restrict msg) { 256 if (r->conf.log_request_header_on_error) { 257 if ((unsigned char)ch > 32 && ch != 127) { 258 log_error(r->conf.errh, __FILE__, __LINE__, "%s ('%c')", msg, ch); 259 } 260 else { 261 log_error(r->conf.errh, __FILE__, __LINE__, "%s (0x%x)", msg, ch); 262 } 263 } 264 return 400; 265 } 266 267 268 __attribute_noinline__ 269 static void http_request_header_set_Host(request_st * const restrict r, const char * const h, size_t hlen) 270 { 271 r->http_host = http_header_request_set_ptr(r, HTTP_HEADER_HOST, 272 CONST_STR_LEN("Host")); 273 buffer_copy_string_len_lc(r->http_host, h, hlen); 274 } 275 276 277 int64_t 278 li_restricted_strtoint64 (const char *v, const uint32_t vlen, const char ** const err) 279 { 280 /* base 10 strtoll() parsing exactly vlen chars and requiring digits 0-9 */ 281 /* rejects negative numbers and considers values > INT64_MAX an error */ 282 /* note: errno is not set; detect error if *err != v+vlen upon return */ 283 /*(caller must check 0 == vlen if that is to be an error for caller)*/ 284 int64_t rv = 0; 285 uint32_t i; 286 for (i = 0; i < vlen; ++i) { 287 const uint8_t c = ((uint8_t *)v)[i] - '0'; /*(unsigned; underflow ok)*/ 288 if (c > 9) break; 289 if (rv > INT64_MAX/10) break; 290 rv *= 10; 291 if (rv > INT64_MAX - c) break; 292 rv += c; 293 } 294 *err = v+i; 295 return rv; 296 } 297 298 299 __attribute_cold__ 300 static int http_request_parse_duplicate(request_st * const restrict r, const enum http_header_e id, const char * const restrict k, const size_t klen, const char * const restrict v, const size_t vlen) { 301 /* Proxies sometimes send dup headers 302 * if they are the same we ignore the second 303 * if not, we raise an error */ 304 const buffer * const vb = http_header_request_get(r, id, k, klen); 305 if (vb && buffer_eq_icase_slen(vb, v, vlen)) 306 return 0; /* ignore header; matches existing header */ 307 308 const char *errmsg; 309 switch (id) { 310 case HTTP_HEADER_HOST: 311 errmsg = "duplicate Host header -> 400"; 312 break; 313 case HTTP_HEADER_CONTENT_TYPE: 314 errmsg = "duplicate Content-Type header -> 400"; 315 break; 316 case HTTP_HEADER_IF_MODIFIED_SINCE: 317 errmsg = "duplicate If-Modified-Since header -> 400"; 318 break; 319 default: 320 errmsg = "duplicate header -> 400"; 321 break; 322 } 323 return http_request_header_line_invalid(r, 400, errmsg); 324 } 325 326 327 /* add header to list of headers 328 * certain headers are also parsed 329 * might drop a header if deemed unnecessary/broken 330 * 331 * returns 0 on success, HTTP status on error 332 */ 333 static int http_request_parse_single_header(request_st * const restrict r, const enum http_header_e id, const char * const restrict k, const size_t klen, const char * const restrict v, const size_t vlen) { 334 /* 335 * Note: k might not be '\0'-terminated 336 * Note: v is not '\0'-terminated 337 * With lighttpd HTTP/1.1 parser, v ends with whitespace 338 * (one of '\r' '\n' ' ' '\t') 339 * With lighttpd HTTP/2 parser, v should not be accessed beyond vlen 340 * (care must be taken to avoid libc funcs which expect z-strings) 341 */ 342 /*assert(vlen);*//*(caller must not call this func with 0 klen or 0 vlen)*/ 343 344 switch (id) { 345 /*case HTTP_HEADER_OTHER:*/ 346 default: 347 break; 348 case HTTP_HEADER_HOST: 349 if (!light_btst(r->rqst_htags, HTTP_HEADER_HOST)) { 350 if (vlen >= 1024) { /*(expecting < 256)*/ 351 return http_request_header_line_invalid(r, 400, "uri-authority too long -> 400"); 352 } 353 /*(http_request_header_append() plus sets r->http_host)*/ 354 http_request_header_set_Host(r, v, vlen); 355 return 0; 356 } 357 else if (NULL != r->http_host 358 && __builtin_expect( buffer_eq_slen(r->http_host,v,vlen), 1)) { 359 /* ignore all Host: headers if match authority in request line */ 360 /* (expect Host to match case in :authority of HTTP/2 request) */ 361 return 0; /* ignore header */ 362 } 363 else { 364 return http_request_parse_duplicate(r, id, k, klen, v, vlen); 365 } 366 break; 367 case HTTP_HEADER_CONNECTION: 368 /* "Connection: close" is common case if header is present */ 369 if ((vlen == 5 && buffer_eq_icase_ssn(v, CONST_STR_LEN("close"))) 370 || http_header_str_contains_token(v,vlen,CONST_STR_LEN("close"))) { 371 r->keep_alive = 0; 372 break; 373 } 374 if (http_header_str_contains_token(v,vlen,CONST_STR_LEN("keep-alive"))){ 375 r->keep_alive = 1; 376 break; 377 } 378 break; 379 case HTTP_HEADER_CONTENT_TYPE: 380 if (light_btst(r->rqst_htags, HTTP_HEADER_CONTENT_TYPE)) { 381 return http_request_parse_duplicate(r, id, k, klen, v, vlen); 382 } 383 break; 384 case HTTP_HEADER_IF_NONE_MATCH: 385 /* if dup, only the first one will survive */ 386 if (light_btst(r->rqst_htags, HTTP_HEADER_IF_NONE_MATCH)) { 387 return 0; /* ignore header */ 388 } 389 break; 390 case HTTP_HEADER_CONTENT_LENGTH: 391 if (!light_btst(r->rqst_htags, HTTP_HEADER_CONTENT_LENGTH)) { 392 /*(trailing whitespace was removed from vlen)*/ 393 /*(not using strtoll() since v might not be z-string)*/ 394 const char *err; 395 off_t clen = (off_t)li_restricted_strtoint64(v, vlen, &err); 396 if (err == v+vlen) { 397 /* (set only if not set to -1 by Transfer-Encoding: chunked) */ 398 if (0 == r->reqbody_length) r->reqbody_length = clen; 399 } 400 else { 401 return http_request_header_line_invalid(r, 400, "invalid Content-Length header -> 400"); 402 } 403 } 404 else { 405 return http_request_header_line_invalid(r, 400, "duplicate Content-Length header -> 400"); 406 } 407 break; 408 case HTTP_HEADER_HTTP2_SETTINGS: 409 if (light_btst(r->rqst_htags, HTTP_HEADER_HTTP2_SETTINGS)) { 410 return http_request_header_line_invalid(r, 400, "duplicate HTTP2-Settings header -> 400"); 411 } 412 break; 413 case HTTP_HEADER_IF_MODIFIED_SINCE: 414 if (light_btst(r->rqst_htags, HTTP_HEADER_IF_MODIFIED_SINCE)) { 415 return http_request_parse_duplicate(r, id, k, klen, v, vlen); 416 } 417 break; 418 case HTTP_HEADER_TRANSFER_ENCODING: 419 if (HTTP_VERSION_1_1 != r->http_version) { 420 return http_request_header_line_invalid(r, 400, 421 HTTP_VERSION_1_0 == r->http_version 422 ? "HTTP/1.0 with Transfer-Encoding (bad HTTP/1.0 proxy?) -> 400" 423 : "HTTP/2 with Transfer-Encoding is invalid -> 400"); 424 } 425 426 if (!buffer_eq_icase_ss(v, vlen, CONST_STR_LEN("chunked"))) { 427 /* Transfer-Encoding might contain additional encodings, 428 * which are not currently supported by lighttpd */ 429 return http_request_header_line_invalid(r, 501, NULL); /* Not Implemented */ 430 } 431 r->reqbody_length = -1; 432 433 /* Transfer-Encoding is a hop-by-hop header, 434 * which must not be blindly forwarded to backends */ 435 return 0; /* skip header */ 436 } 437 438 http_header_request_append(r, id, k, klen, v, vlen); 439 return 0; 440 } 441 442 __attribute_cold__ 443 static int http_request_parse_proto_loose(request_st * const restrict r, const char * const restrict ptr, const size_t len, const unsigned int http_parseopts) { 444 const char * proto = memchr(ptr, ' ', len); 445 if (NULL == proto) 446 return http_request_header_line_invalid(r, 400, "incomplete request line -> 400"); 447 proto = memchr(proto+1, ' ', len - (proto+1 - ptr)); 448 if (NULL == proto) 449 return http_request_header_line_invalid(r, 400, "incomplete request line -> 400"); 450 ++proto; 451 452 if (proto[0]=='H' && proto[1]=='T' && proto[2]=='T' && proto[3]=='P' && proto[4] == '/') { 453 if (proto[5] == '1' && proto[6] == '.' && (proto[7] == '1' || proto[7] == '0')) { 454 /* length already checked before calling this routine */ 455 /* (len != (size_t)(proto - ptr + 8)) */ 456 if (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT) /*(http_header_strict)*/ 457 return http_request_header_line_invalid(r, 400, "incomplete request line -> 400"); 458 r->http_version = (proto[7] == '1') ? HTTP_VERSION_1_1 : HTTP_VERSION_1_0; 459 } 460 else 461 return http_request_header_line_invalid(r, 505, "unknown HTTP version -> 505"); 462 } 463 else 464 return http_request_header_line_invalid(r, 400, "unknown protocol -> 400"); 465 466 /* keep-alive default: HTTP/1.1 -> true; HTTP/1.0 -> false */ 467 r->keep_alive = (HTTP_VERSION_1_0 != r->http_version); 468 469 return 0; 470 } 471 472 __attribute_cold__ 473 static const char * http_request_parse_reqline_uri(request_st * const restrict r, const char * const restrict uri, const size_t len, const unsigned int http_parseopts) { 474 const char *nuri; 475 if ((len > 7 && buffer_eq_icase_ssn(uri, "http://", 7) 476 && NULL != (nuri = memchr(uri + 7, '/', len-7))) 477 || 478 (len > 8 && buffer_eq_icase_ssn(uri, "https://", 8) 479 && NULL != (nuri = memchr(uri + 8, '/', len-8)))) { 480 const char * const host = uri + (uri[4] == ':' ? 7 : 8); 481 const size_t hostlen = nuri - host; 482 if (0 == hostlen || hostlen >= 1024) { /*(expecting < 256)*/ 483 http_request_header_line_invalid(r, 400, "uri-authority empty or too long -> 400"); 484 return NULL; 485 } 486 /* Insert as "Host" header */ 487 http_request_header_set_Host(r, host, hostlen); 488 return nuri; 489 } else if (!(http_parseopts & HTTP_PARSEOPT_HEADER_STRICT) /*(!http_header_strict)*/ 490 || (HTTP_METHOD_CONNECT == r->http_method && (uri[0] == ':' || light_isdigit(uri[0]))) 491 || (HTTP_METHOD_OPTIONS == r->http_method && uri[0] == '*' && 1 == len)) { 492 /* (permitted) */ 493 return uri; 494 } else { 495 http_request_header_line_invalid(r, 400, "request-URI parse error -> 400"); 496 return NULL; 497 } 498 } 499 500 501 __attribute_cold__ 502 __attribute_noinline__ 503 static int http_request_parse_header_other(request_st * const restrict r, const char * const restrict k, const int klen, const unsigned int http_header_strict); 504 505 506 int 507 http_request_validate_pseudohdrs (request_st * const restrict r, const int scheme, const unsigned int http_parseopts) 508 { 509 /* :method is required to indicate method 510 * CONNECT method must have :method and :authority 511 * All other methods must have at least :method :scheme :path */ 512 513 if (HTTP_METHOD_UNSET == r->http_method) 514 return http_request_header_line_invalid(r, 400, 515 "missing pseudo-header method -> 400"); 516 517 if (__builtin_expect( (HTTP_METHOD_CONNECT != r->http_method), 1)) { 518 if (!scheme) 519 return http_request_header_line_invalid(r, 400, 520 "missing pseudo-header scheme -> 400"); 521 522 if (buffer_is_blank(&r->target)) 523 return http_request_header_line_invalid(r, 400, 524 "missing pseudo-header path -> 400"); 525 526 const char * const uri = r->target.ptr; 527 if (*uri != '/') { /* (common case: (*uri == '/')) */ 528 if (uri[0] != '*' || uri[1] != '\0' 529 || HTTP_METHOD_OPTIONS != r->http_method) 530 return http_request_header_line_invalid(r, 400, 531 "invalid pseudo-header path -> 400"); 532 } 533 } 534 else { /* HTTP_METHOD_CONNECT */ 535 if (NULL == r->http_host) 536 return http_request_header_line_invalid(r, 400, 537 "missing pseudo-header authority -> 400"); 538 if (!buffer_is_blank(&r->target) || scheme) 539 return http_request_header_line_invalid(r, 400, 540 "invalid pseudo-header with CONNECT -> 400"); 541 /* note: this copy occurs prior to http_request_host_policy() 542 * so any consumer handling CONNECT should normalize r->target 543 * as appropriate */ 544 buffer_copy_buffer(&r->target, r->http_host); 545 } 546 buffer_copy_buffer(&r->target_orig, &r->target); 547 548 /* r->http_host, if set, is checked with http_request_host_policy() 549 * in http_request_parse() */ 550 551 /* copied and modified from end of http_request_parse_reqline() */ 552 553 /* check uri for invalid characters */ 554 const unsigned int http_header_strict = 555 (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT); 556 const uint32_t ulen = buffer_clen(&r->target); 557 const uint8_t * const uri = (uint8_t *)r->target.ptr; 558 if (http_header_strict) { 559 if (http_parseopts & HTTP_PARSEOPT_URL_NORMALIZE_CTRLS_REJECT) 560 return 0; /* URI will be checked in http_request_parse_target() */ 561 for (uint32_t i = 0; i < ulen; ++i) { 562 if (!request_uri_is_valid_char(uri[i])) 563 return http_request_header_char_invalid(r, uri[i], 564 "invalid character in URI -> 400"); 565 } 566 } 567 else { 568 if (NULL != memchr(uri, '\0', ulen)) 569 return http_request_header_char_invalid(r, '\0', 570 "invalid character in header -> 400"); 571 } 572 573 return 0; 574 } 575 576 577 int 578 http_request_parse_header (request_st * const restrict r, http_header_parse_ctx * const restrict hpctx) 579 { 580 /* Note: k and v might not be '\0' terminated strings; 581 * care must be taken to avoid libc funcs which expect z-strings */ 582 const char * const restrict k = hpctx->k; 583 const char * const restrict v = hpctx->v; 584 const uint32_t klen = hpctx->klen; 585 const uint32_t vlen = hpctx->vlen; 586 587 if (0 == klen) 588 return http_request_header_line_invalid(r, 400, 589 "invalid header key -> 400"); 590 591 if ((hpctx->hlen += klen + vlen + 4) > hpctx->max_request_field_size) { 592 /*(configurable with server.max-request-field-size; default 8k)*/ 593 #if 1 /* emit to error log for people sending large headers */ 594 log_error(r->conf.errh, __FILE__, __LINE__, 595 "oversized request header -> 431"); 596 return 431; /* Request Header Fields Too Large */ 597 #else 598 /* 431 Request Header Fields Too Large */ 599 return http_request_header_line_invalid(r, 431, 600 "oversized request header -> 431"); 601 #endif 602 } 603 604 if (!hpctx->trailers) { 605 if (*k == ':') { 606 /* HTTP/2 request pseudo-header fields */ 607 if (!hpctx->pseudo) /*(pseudo header after non-pseudo header)*/ 608 return http_request_header_line_invalid(r, 400, 609 "invalid pseudo-header -> 400"); 610 if (0 == vlen) 611 return http_request_header_line_invalid(r, 400, 612 "invalid header value -> 400"); 613 614 /* (note: relies on implementation details using ls-hpack in h2.c) 615 * (hpctx->id mapped from lsxpack_header_t hpack_index, which only 616 * matches key, not also value, if lsxpack_header_t flags does not 617 * have LSXPACK_HPACK_VAL_MATCHED set, so HTTP_HEADER_H2_METHOD_GET 618 * below indicates any method, not only "GET") */ 619 if (__builtin_expect( (hpctx->id == HTTP_HEADER_H2_UNKNOWN), 0)) { 620 switch (klen-1) { 621 case 4: 622 if (0 == memcmp(k+1, "path", 4)) 623 hpctx->id = HTTP_HEADER_H2_PATH; 624 break; 625 case 6: 626 if (0 == memcmp(k+1, "method", 6)) 627 hpctx->id = HTTP_HEADER_H2_METHOD_GET; 628 else if (0 == memcmp(k+1, "scheme", 6)) 629 hpctx->id = HTTP_HEADER_H2_SCHEME_HTTP; 630 break; 631 case 9: 632 if (0 == memcmp(k+1, "authority", 9)) 633 hpctx->id = HTTP_HEADER_H2_AUTHORITY; 634 break; 635 default: 636 break; 637 } 638 if (hpctx->id >= HTTP_HEADER_H2_UNKNOWN) 639 return http_request_header_line_invalid(r, 400, 640 "invalid pseudo-header -> 400"); 641 } 642 643 switch (hpctx->id) { 644 case HTTP_HEADER_H2_AUTHORITY: 645 if (__builtin_expect( (r->http_host != NULL), 0)) 646 break; 647 if (vlen >= 1024) /*(expecting < 256)*/ 648 return http_request_header_line_invalid(r, 400, 649 "invalid pseudo-header authority too long -> 400"); 650 /* insert as "Host" header */ 651 http_request_header_set_Host(r, v, vlen); 652 return 0; 653 case HTTP_HEADER_H2_METHOD_GET: /*(any method, not only "GET")*/ 654 case HTTP_HEADER_H2_METHOD_POST: 655 if (__builtin_expect( (HTTP_METHOD_UNSET != r->http_method), 0)) 656 break; 657 r->http_method = get_http_method_key(v, vlen); 658 if (HTTP_METHOD_UNSET >= r->http_method) 659 return http_request_header_line_invalid(r, 501, 660 "unknown http-method -> 501"); 661 return 0; 662 case HTTP_HEADER_H2_PATH: /*(any path, not only "/")*/ 663 case HTTP_HEADER_H2_PATH_INDEX_HTML: 664 if (__builtin_expect( (!buffer_is_blank(&r->target)), 0)) 665 break; 666 buffer_copy_string_len(&r->target, v, vlen); 667 return 0; 668 case HTTP_HEADER_H2_SCHEME_HTTP: /*(any scheme, not only "http")*/ 669 case HTTP_HEADER_H2_SCHEME_HTTPS: 670 if (__builtin_expect( (hpctx->scheme), 0)) 671 break; 672 hpctx->scheme = 1; /*(marked present, but otherwise ignored)*/ 673 return 0; 674 #if 0 675 switch (vlen) {/*(validated, but then ignored)*/ 676 case 5: /* "https" */ 677 if (v[4]!='s') break; 678 __attribute_fallthrough__ 679 case 4: /* "http" */ 680 if (v[0]=='h' && v[1]=='t' && v[2]=='t' && v[3]=='p') { 681 hpctx->scheme = 1; 682 return 0; 683 } 684 break; 685 default: 686 break; 687 } 688 return http_request_header_line_invalid(r, 400, 689 "unknown pseudo-header scheme -> 400"); 690 #endif 691 default: 692 return http_request_header_line_invalid(r, 400, 693 "invalid pseudo-header -> 400"); 694 } 695 return http_request_header_line_invalid(r, 400, 696 "repeated pseudo-header -> 400"); 697 } 698 else { /*(non-pseudo headers)*/ 699 if (hpctx->pseudo) { /*(transition to non-pseudo headers)*/ 700 hpctx->pseudo = 0; 701 int status = 702 http_request_validate_pseudohdrs(r, hpctx->scheme, 703 hpctx->http_parseopts); 704 if (0 != status) return status; 705 } 706 if (0 == vlen) 707 return 0; 708 709 const unsigned int http_header_strict = 710 (hpctx->http_parseopts & HTTP_PARSEOPT_HEADER_STRICT); 711 712 if (http_header_strict) { 713 for (uint32_t j = 0; j < vlen; ++j) { 714 if ((((uint8_t *)v)[j] < 32 && v[j] != '\t') || v[j]==127) 715 return http_request_header_char_invalid(r, v[j], 716 "invalid character in header -> 400"); 717 } 718 } 719 else { 720 if (NULL != memchr(v, '\0', vlen)) 721 return http_request_header_char_invalid(r, '\0', 722 "invalid character in header -> 400"); 723 } 724 725 if (__builtin_expect( (hpctx->id == HTTP_HEADER_H2_UNKNOWN), 0)) { 726 uint32_t j = 0; 727 while (j < klen && (light_islower(k[j]) || k[j] == '-')) 728 ++j; 729 730 if (__builtin_expect( (j != klen), 0)) { 731 if (light_isupper(k[j])) 732 return 400; 733 if (0 != http_request_parse_header_other(r, k+j, klen-j, 734 http_header_strict)) 735 return 400; 736 } 737 738 hpctx->id = http_header_hkey_get_lc(k, klen); 739 } 740 741 const enum http_header_e id = (enum http_header_e)hpctx->id; 742 743 if (__builtin_expect( (id == HTTP_HEADER_TE), 0) 744 && !buffer_eq_icase_ss(v, vlen, CONST_STR_LEN("trailers"))) 745 return http_request_header_line_invalid(r, 400, 746 "invalid TE header value with HTTP/2 -> 400"); 747 748 return http_request_parse_single_header(r, id, k, klen, v, vlen); 749 } 750 } 751 else { /*(trailers)*/ 752 if (*k == ':') 753 return http_request_header_line_invalid(r, 400, 754 "invalid pseudo-header in trailers -> 400"); 755 /* ignore trailers (after required HPACK decoding) if streaming 756 * request body to backend since headers have already been sent 757 * to backend via Common Gateway Interface (CGI) (CGI, FastCGI, 758 * SCGI, etc) or HTTP/1.1 (proxy) (mod_proxy does not currently 759 * support using HTTP/2 to connect to backends) */ 760 #if 0 /* (if needed, save flag in hpctx instead of fdevent.h dependency)*/ 761 if (r->conf.stream_request_body & FDEVENT_STREAM_REQUEST) 762 return 0; 763 #endif 764 /* Note: do not unconditionally merge into headers since if 765 * headers had already been sent to backend, then mod_accesslog 766 * logging of request headers might be inaccurate. 767 * Many simple backends do not support HTTP/1.1 requests sending 768 * Transfer-Encoding: chunked, and even those that do might not 769 * handle trailers. Some backends do not even support HTTP/1.1. 770 * For all these reasons, ignore trailers if streaming request 771 * body to backend. Revisit in future if adding support for 772 * connecting to backends using HTTP/2 (with explicit config 773 * option to force connecting to backends using HTTP/2) */ 774 775 /* XXX: TODO: request trailers not handled if streaming reqbody 776 * XXX: must ensure that trailers are not disallowed field-names 777 */ 778 779 #if 0 780 if (0 == vlen) 781 return 0; 782 #endif 783 784 return 0; 785 } 786 } 787 788 789 static int http_request_parse_reqline(request_st * const restrict r, const char * const restrict ptr, const unsigned short * const restrict hoff, const unsigned int http_parseopts) { 790 size_t len = hoff[2]; 791 792 /* parse the first line of the request 793 * <method> <uri> <protocol>\r\n 794 * */ 795 if (len < 13) /* minimum len with (!http_header_strict): "x x HTTP/1.0\n" */ 796 return http_request_header_line_invalid(r, 400, "invalid request line (too short) -> 400"); 797 if (ptr[len-2] == '\r') 798 len-=2; 799 else if (!(http_parseopts & HTTP_PARSEOPT_HEADER_STRICT)) /*(!http_header_strict)*/ 800 len-=1; 801 else 802 return http_request_header_line_invalid(r, 400, "missing CR before LF in header -> 400"); 803 804 /* 805 * RFC7230: 806 * HTTP-version = HTTP-name "/" DIGIT "." DIGIT 807 * HTTP-name = %x48.54.54.50 ; "HTTP", case-sensitive 808 */ 809 810 /* protocol is expected to be " HTTP/1.1" or " HTTP/1.0" at end of line */ 811 union proto_un { 812 char c[8]; 813 uint64_t u; 814 }; 815 static const union proto_un http_1_1 = {{'H','T','T','P','/','1','.','1'}}; 816 static const union proto_un http_1_0 = {{'H','T','T','P','/','1','.','0'}}; 817 const char *p = ptr + len - 8; 818 union proto_un proto8; 819 proto8.c[0]=p[0]; proto8.c[1]=p[1]; proto8.c[2]=p[2]; proto8.c[3]=p[3]; 820 proto8.c[4]=p[4]; proto8.c[5]=p[5]; proto8.c[6]=p[6]; proto8.c[7]=p[7]; 821 if (p[-1] == ' ' && http_1_1.u == proto8.u) { 822 r->http_version = HTTP_VERSION_1_1; 823 r->keep_alive = 1; /* keep-alive default: HTTP/1.1 -> true */ 824 } 825 else if (p[-1] == ' ' && http_1_0.u == proto8.u) { 826 r->http_version = HTTP_VERSION_1_0; 827 r->keep_alive = 0; /* keep-alive default: HTTP/1.0 -> false */ 828 } 829 else { 830 int status = http_request_parse_proto_loose(r,ptr,len,http_parseopts); 831 if (0 != status) return status; 832 /*(space char must exist if http_request_parse_proto_loose() succeeds)*/ 833 for (p = ptr + len - 9; p[-1] != ' '; --p) ; 834 } 835 836 /* method is expected to be a short string in the general case */ 837 size_t i = 0; 838 while (ptr[i] != ' ') ++i; 839 #if 0 /*(space must exist if protocol was parsed successfully)*/ 840 while (i < len && ptr[i] != ' ') ++i; 841 if (ptr[i] != ' ') 842 return http_request_header_line_invalid(r, 400, "incomplete request line -> 400"); 843 #endif 844 845 r->http_method = get_http_method_key(ptr, i); 846 if (HTTP_METHOD_UNSET >= r->http_method) 847 return http_request_header_line_invalid(r, 501, "unknown http-method -> 501"); 848 849 const char *uri = ptr + i + 1; 850 851 if (uri == p) 852 return http_request_header_line_invalid(r, 400, "no uri specified -> 400"); 853 len = (size_t)(p - uri - 1); 854 855 if (*uri != '/') { /* (common case: (*uri == '/')) */ 856 uri = http_request_parse_reqline_uri(r, uri, len, http_parseopts); 857 if (NULL == uri) return 400; 858 len = (size_t)(p - uri - 1); 859 } 860 861 if (0 == len) 862 return http_request_header_line_invalid(r, 400, "no uri specified -> 400"); 863 864 /* check uri for invalid characters */ 865 if (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT) { /* http_header_strict */ 866 if ((http_parseopts & HTTP_PARSEOPT_URL_NORMALIZE_CTRLS_REJECT)) { 867 /* URI will be checked in http_request_parse_target() */ 868 } 869 else { 870 for (i = 0; i < len; ++i) { 871 if (!request_uri_is_valid_char(uri[i])) 872 return http_request_header_char_invalid(r, uri[i], "invalid character in URI -> 400"); 873 } 874 } 875 } 876 else { 877 /* check entire set of request headers for '\0' */ 878 if (NULL != memchr(ptr, '\0', hoff[hoff[0]])) 879 return http_request_header_char_invalid(r, '\0', "invalid character in header -> 400"); 880 } 881 882 buffer_copy_string_len(&r->target, uri, len); 883 buffer_copy_string_len(&r->target_orig, uri, len); 884 return 0; 885 } 886 887 int http_request_parse_target(request_st * const r, int scheme_port) { 888 /* URI is parsed into components at start of request and may 889 * also be re-parsed upon HANDLER_COMEBACK during the request 890 * r->target is expected to be a "/url-part?query-part" 891 * (and *not* a fully-qualified URI starting https://...) 892 * r->uri.authority is expected to be parsed elsewhere into r->http_host 893 */ 894 895 /** 896 * prepare strings 897 * 898 * - uri.path 899 * - uri.query 900 * 901 */ 902 903 /** 904 * Name according to RFC 2396 905 * 906 * - scheme 907 * - authority 908 * - path 909 * - query 910 * 911 * (scheme)://(authority)(path)?(query)#fragment 912 * 913 */ 914 915 /* take initial scheme value from connection-level state 916 * (request r->uri.scheme can be overwritten for later, 917 * for example by mod_extforward or mod_magnet) */ 918 buffer_copy_string_len(&r->uri.scheme, "https", scheme_port == 443 ? 5 : 4); 919 920 buffer * const target = &r->target; 921 if (r->http_method == HTTP_METHOD_CONNECT 922 || (r->http_method == HTTP_METHOD_OPTIONS 923 && target->ptr[0] == '*' 924 && target->ptr[1] == '\0')) { 925 /* CONNECT ... (or) OPTIONS * ... */ 926 buffer_copy_buffer(&r->uri.path, target); 927 buffer_clear(&r->uri.query); 928 return 0; 929 } 930 931 char *qstr; 932 if (r->conf.http_parseopts & HTTP_PARSEOPT_URL_NORMALIZE) { 933 /*uint32_t len = buffer_clen(target);*/ 934 int qs = burl_normalize(target, r->tmp_buf, r->conf.http_parseopts); 935 if (-2 == qs) 936 return http_request_header_line_invalid(r, 400, 937 "invalid character in URI -> 400"); /* Bad Request */ 938 qstr = (-1 == qs) ? NULL : target->ptr+qs; 939 #if 0 /* future: might enable here, or below for all requests */ 940 /* (Note: total header size not recalculated on HANDLER_COMEBACK 941 * even if other request headers changed during processing) 942 * (If (0 != r->loops_per_request), then the generated 943 * request is too large. Should a different error be returned?) */ 944 r->rqst_header_len -= len; 945 len = buffer_clen(target); 946 r->rqst_header_len += len; 947 if (len > MAX_HTTP_REQUEST_URI) { 948 return 414; /* 414 URI Too Long */ 949 } 950 if (r->rqst_header_len > MAX_HTTP_REQUEST_HEADER) { 951 log_error(r->conf.errh, __FILE__, __LINE__, 952 "request header fields too large: %u -> 431", 953 r->rqst_header_len); 954 return 431; /* Request Header Fields Too Large */ 955 } 956 #endif 957 } 958 else { 959 size_t rlen = buffer_clen(target); 960 qstr = memchr(target->ptr, '#', rlen);/* discard fragment */ 961 if (qstr) { 962 rlen = (size_t)(qstr - target->ptr); 963 buffer_truncate(target, rlen); 964 } 965 qstr = memchr(target->ptr, '?', rlen); 966 } 967 968 /** extract query string from target */ 969 const char * const pstr = target->ptr; 970 const uint32_t rlen = buffer_clen(target); 971 uint32_t plen; 972 if (NULL != qstr) { 973 plen = (uint32_t)(qstr - pstr); 974 buffer_copy_string_len(&r->uri.query, qstr + 1, rlen - plen - 1); 975 } 976 else { 977 plen = rlen; 978 buffer_clear(&r->uri.query); 979 } 980 buffer_copy_string_len(&r->uri.path, pstr, plen); 981 982 /* decode url to path 983 * 984 * - decode url-encodings (e.g. %20 -> ' ') 985 * - remove path-modifiers (e.g. /../) 986 */ 987 988 buffer_urldecode_path(&r->uri.path); 989 buffer_path_simplify(&r->uri.path); 990 if (r->uri.path.ptr[0] != '/') 991 return http_request_header_line_invalid(r, 400, 992 "uri-path does not begin with '/' -> 400"); /* Bad Request */ 993 994 return 0; 995 } 996 997 __attribute_cold__ 998 __attribute_noinline__ 999 static int http_request_parse_header_other(request_st * const restrict r, const char * const restrict k, const int klen, const unsigned int http_header_strict) { 1000 for (int i = 0; i < klen; ++i) { 1001 if (light_isalpha(k[i]) || k[i] == '-') continue; /*(common cases)*/ 1002 /** 1003 * 1*<any CHAR except CTLs or separators> 1004 * CTLs == 0-31 + 127, CHAR = 7-bit ascii (0..127) 1005 * 1006 */ 1007 switch(k[i]) { 1008 case ' ': 1009 case '\t': 1010 return http_request_header_line_invalid(r, 400, "WS character in key -> 400"); 1011 case '(': 1012 case ')': 1013 case '<': 1014 case '>': 1015 case '@': 1016 case ',': 1017 case ';': 1018 case '\\': 1019 case '\"': 1020 case '/': 1021 case '[': 1022 case ']': 1023 case '?': 1024 case '=': 1025 case '{': 1026 case '}': 1027 return http_request_header_char_invalid(r, k[i], "invalid character in header key -> 400"); 1028 default: 1029 if (http_header_strict ? (k[i] < 32 || ((unsigned char *)k)[i] >= 127) : k[i] == '\0') 1030 return http_request_header_char_invalid(r, k[i], "invalid character in header key -> 400"); 1031 break; /* ok */ 1032 } 1033 } 1034 return 0; 1035 } 1036 1037 static int http_request_parse_headers(request_st * const restrict r, char * const restrict ptr, const unsigned short * const restrict hoff, const unsigned int http_parseopts) { 1038 const unsigned int http_header_strict = (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT); 1039 1040 #if 0 /*(not checked here; will later result in invalid label for HTTP header)*/ 1041 int i = hoff[2]; 1042 1043 if (ptr[i] == ' ' || ptr[i] == '\t') { 1044 return http_request_header_line_invalid(r, 400, "WS at the start of first line -> 400"); 1045 } 1046 #endif 1047 1048 for (int i = 2; i < hoff[0]; ++i) { 1049 const char *k = ptr + hoff[i]; 1050 /* one past last line hoff[hoff[0]] is to final "\r\n" */ 1051 char *end = ptr + hoff[i+1]; 1052 1053 const char *colon = memchr(k, ':', end - k); 1054 if (NULL == colon) 1055 return http_request_header_line_invalid(r, 400, "invalid header missing ':' -> 400"); 1056 1057 const char *v = colon + 1; 1058 1059 /* RFC7230 Hypertext Transfer Protocol (HTTP/1.1): Message Syntax and Routing 1060 * 3.2.4. Field Parsing 1061 * [...] 1062 * No whitespace is allowed between the header field-name and colon. In 1063 * the past, differences in the handling of such whitespace have led to 1064 * security vulnerabilities in request routing and response handling. A 1065 * server MUST reject any received request message that contains 1066 * whitespace between a header field-name and colon with a response code 1067 * of 400 (Bad Request). A proxy MUST remove any such whitespace from a 1068 * response message before forwarding the message downstream. 1069 */ 1070 /* (line k[-1] is always preceded by a '\n', 1071 * including first header after request-line, 1072 * so no need to check colon != k) */ 1073 if (colon[-1] == ' ' || colon[-1] == '\t') { 1074 if (http_header_strict) { 1075 return http_request_header_line_invalid(r, 400, "invalid whitespace between field-name and colon -> 400"); 1076 } 1077 else { 1078 /* remove trailing whitespace from key(if !http_header_strict)*/ 1079 do { --colon; } while (colon[-1] == ' ' || colon[-1] == '\t'); 1080 } 1081 } 1082 1083 const int klen = (int)(colon - k); 1084 if (0 == klen) 1085 return http_request_header_line_invalid(r, 400, "invalid header key -> 400"); 1086 const enum http_header_e id = http_header_hkey_get(k, klen); 1087 1088 if (id == HTTP_HEADER_OTHER) { 1089 for (int j = 0; j < klen; ++j) { 1090 if (light_isalpha(k[j]) || k[j] == '-') continue; /*(common cases)*/ 1091 if (0 != http_request_parse_header_other(r, k+j, klen-j, http_header_strict)) 1092 return 400; 1093 break; 1094 } 1095 } 1096 1097 /* remove leading whitespace from value */ 1098 while (*v == ' ' || *v == '\t') ++v; 1099 1100 for (; i+1 <= hoff[0]; ++i) { 1101 end = ptr + hoff[i+1]; 1102 if (end[0] != ' ' && end[0] != '\t') break; 1103 1104 /* line folding */ 1105 #ifdef __COVERITY__ 1106 force_assert(end - k >= 2); 1107 #endif 1108 if (end[-2] == '\r') 1109 end[-2] = ' '; 1110 else if (http_header_strict) 1111 return http_request_header_line_invalid(r, 400, "missing CR before LF in header -> 400"); 1112 end[-1] = ' '; 1113 } 1114 #ifdef __COVERITY__ 1115 /*(buf holding k has non-zero request-line, so end[-2] valid)*/ 1116 force_assert(end >= k + 2); 1117 #endif 1118 if (end[-2] == '\r') 1119 --end; 1120 else if (http_header_strict) 1121 return http_request_header_line_invalid(r, 400, "missing CR before LF in header -> 400"); 1122 /* remove trailing whitespace from value (+ remove '\r\n') */ 1123 /* (line k[-1] is always preceded by a '\n', 1124 * including first header after request-line, 1125 * so no need to check (end != k)) */ 1126 do { --end; } while (end[-1] == ' ' || end[-1] == '\t'); 1127 1128 const int vlen = (int)(end - v); 1129 /* empty header-fields are not allowed by HTTP-RFC, we just ignore them */ 1130 if (vlen <= 0) continue; /* ignore header */ 1131 1132 if (http_header_strict) { 1133 for (int j = 0; j < vlen; ++j) { 1134 if ((((unsigned char *)v)[j] < 32 && v[j] != '\t') || v[j]==127) 1135 return http_request_header_char_invalid(r, v[j], "invalid character in header -> 400"); 1136 } 1137 } /* else URI already checked in http_request_parse_reqline() for any '\0' */ 1138 1139 int status = http_request_parse_single_header(r, id, k, (size_t)klen, v, (size_t)vlen); 1140 if (0 != status) return status; 1141 } 1142 1143 return 0; 1144 } 1145 1146 1147 static int 1148 http_request_parse (request_st * const restrict r, const int scheme_port) 1149 { 1150 int status = http_request_parse_target(r, scheme_port); 1151 if (0 != status) return status; 1152 1153 /* post-processing */ 1154 const unsigned int http_parseopts = r->conf.http_parseopts; 1155 1156 /* check hostname field if it is set */ 1157 /*(r->http_host might not be set until after parsing request headers)*/ 1158 if (__builtin_expect( (r->http_host != NULL), 1)) { 1159 if (0 != http_request_host_policy(r->http_host, 1160 http_parseopts, scheme_port)) 1161 return http_request_header_line_invalid(r, 400, "Invalid Hostname -> 400"); 1162 buffer_copy_buffer(&r->uri.authority, r->http_host); 1163 } 1164 else { 1165 buffer_copy_string_len(&r->uri.authority, CONST_STR_LEN("")); 1166 if (r->http_version >= HTTP_VERSION_1_1) 1167 return http_request_header_line_invalid(r, 400, "HTTP/1.1 but Host missing -> 400"); 1168 } 1169 1170 if (HTTP_VERSION_1_1 != r->http_version 1171 && (r->rqst_htags 1172 & (light_bshift(HTTP_HEADER_UPGRADE) 1173 |light_bshift(HTTP_HEADER_HTTP2_SETTINGS)))) { 1174 return http_request_header_line_invalid(r, 400, "invalid hop-by-hop header w/o HTTP/1.1 -> 400"); 1175 } 1176 1177 if (0 == r->reqbody_length) { 1178 /* POST requires Content-Length (or Transfer-Encoding) 1179 * (-1 == r->reqbody_length when Transfer-Encoding: chunked)*/ 1180 if (HTTP_METHOD_POST == r->http_method 1181 && !light_btst(r->rqst_htags, HTTP_HEADER_CONTENT_LENGTH)) { 1182 return http_request_header_line_invalid(r, 411, "POST-request, but content-length missing -> 411"); 1183 } 1184 } 1185 else { 1186 /* (-1 == r->reqbody_length when Transfer-Encoding: chunked)*/ 1187 if (-1 == r->reqbody_length 1188 && light_btst(r->rqst_htags, HTTP_HEADER_CONTENT_LENGTH)) { 1189 /* RFC7230 Hypertext Transfer Protocol (HTTP/1.1): Message Syntax and Routing 1190 * 3.3.3. Message Body Length 1191 * [...] 1192 * If a message is received with both a Transfer-Encoding and a 1193 * Content-Length header field, the Transfer-Encoding overrides the 1194 * Content-Length. Such a message might indicate an attempt to 1195 * perform request smuggling (Section 9.5) or response splitting 1196 * (Section 9.4) and ought to be handled as an error. A sender MUST 1197 * remove the received Content-Length field prior to forwarding such 1198 * a message downstream. 1199 */ 1200 const unsigned int http_header_strict = 1201 (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT); 1202 if (http_header_strict) { 1203 return http_request_header_line_invalid(r, 400, "invalid Transfer-Encoding + Content-Length -> 400"); 1204 } 1205 else { 1206 /* ignore Content-Length */ 1207 http_header_request_unset(r, HTTP_HEADER_CONTENT_LENGTH, CONST_STR_LEN("Content-Length")); 1208 } 1209 } 1210 if (http_method_get_or_head(r->http_method) 1211 && !(http_parseopts & HTTP_PARSEOPT_METHOD_GET_BODY)) { 1212 return http_request_header_line_invalid(r, 400, "GET/HEAD with content-length -> 400"); 1213 } 1214 } 1215 1216 return 0; 1217 } 1218 1219 1220 static int 1221 http_request_parse_hoff (request_st * const restrict r, char * const restrict hdrs, const unsigned short * const restrict hoff, const int scheme_port) 1222 { 1223 /* 1224 * Request: "^(GET|POST|HEAD|...) ([^ ]+(\\?[^ ]+|)) (HTTP/1\\.[01])$" 1225 * Header : "^([-a-zA-Z]+): (.+)$" 1226 * End : "^$" 1227 */ 1228 1229 int status; 1230 const unsigned int http_parseopts = r->conf.http_parseopts; 1231 1232 status = http_request_parse_reqline(r, hdrs, hoff, http_parseopts); 1233 if (0 != status) return status; 1234 1235 status = http_request_parse_headers(r, hdrs, hoff, http_parseopts); 1236 if (0 != status) return status; 1237 1238 return http_request_parse(r, scheme_port); 1239 } 1240 1241 1242 static void 1243 http_request_headers_fin (request_st * const restrict r) 1244 { 1245 if (0 == r->http_status) { 1246 #if 0 1247 r->conditional_is_valid = (1 << COMP_SERVER_SOCKET) 1248 | (1 << COMP_HTTP_SCHEME) 1249 | (1 << COMP_HTTP_HOST) 1250 | (1 << COMP_HTTP_REMOTE_IP) 1251 | (1 << COMP_HTTP_REQUEST_METHOD) 1252 | (1 << COMP_HTTP_URL) 1253 | (1 << COMP_HTTP_QUERY_STRING) 1254 | (1 << COMP_HTTP_REQUEST_HEADER); 1255 #else 1256 /* all config conditions are valid after parsing header 1257 * (set all bits; remove dependency on plugin_config.h) */ 1258 r->conditional_is_valid = ~0u; 1259 #endif 1260 } 1261 else { 1262 r->keep_alive = 0; 1263 r->reqbody_length = 0; 1264 } 1265 } 1266 1267 1268 void 1269 http_request_headers_process (request_st * const restrict r, char * const restrict hdrs, const unsigned short * const restrict hoff, const int scheme_port) 1270 { 1271 r->http_status = http_request_parse_hoff(r, hdrs, hoff, scheme_port); 1272 1273 http_request_headers_fin(r); 1274 1275 if (__builtin_expect( (0 != r->http_status), 0)) { 1276 if (r->conf.log_request_header_on_error) { 1277 /*(http_request_parse_headers() modifies hdrs only to 1278 * undo line-wrapping in-place using spaces)*/ 1279 log_error_multiline(r->conf.errh, __FILE__, __LINE__, 1280 hdrs, r->rqst_header_len, "rqst: "); 1281 } 1282 } 1283 } 1284 1285 1286 void 1287 http_request_headers_process_h2 (request_st * const restrict r, const int scheme_port) 1288 { 1289 if (0 == r->http_status) 1290 r->http_status = http_request_parse(r, scheme_port); 1291 1292 if (0 == r->http_status) { 1293 if (light_btst(r->rqst_htags, HTTP_HEADER_CONNECTION)) 1294 r->http_status = http_request_header_line_invalid(r, 400, 1295 "invalid Connection header with HTTP/2 -> 400"); 1296 } 1297 1298 http_request_headers_fin(r); 1299 1300 /* limited; headers not collected into a single buf for HTTP/2 */ 1301 if (__builtin_expect( (0 != r->http_status), 0)) { 1302 if (r->conf.log_request_header_on_error) { 1303 log_error(r->conf.errh, __FILE__, __LINE__, 1304 "request-header:\n:authority: %s\n:method: %s\n:path: %s", 1305 r->http_host ? r->http_host->ptr : "", 1306 http_method_buf(r->http_method)->ptr, 1307 !buffer_is_blank(&r->target) ? r->target.ptr : ""); 1308 } 1309 } 1310 1311 /* ignore Upgrade if using HTTP/2 */ 1312 if (light_btst(r->rqst_htags, HTTP_HEADER_UPGRADE)) 1313 http_header_request_unset(r, HTTP_HEADER_UPGRADE, 1314 CONST_STR_LEN("upgrade")); 1315 /* XXX: should filter out other hop-by-hop connection headers, too */ 1316 } 1317