1 /* 2 * request - HTTP request processing 3 * 4 * Fully-rewritten from original 5 * Copyright(c) 2018 Glenn Strauss gstrauss()gluelogic.com All rights reserved 6 * License: BSD 3-clause (same as lighttpd) 7 */ 8 #include "first.h" 9 10 #include "request.h" 11 #include "burl.h" 12 #include "http_header.h" 13 #include "http_kv.h" 14 #include "log.h" 15 #include "sock_addr.h" 16 17 #include <limits.h> 18 #include <stdint.h> 19 #include <stdlib.h> 20 #include <string.h> 21 22 __attribute_noinline__ 23 __attribute_nonnull__() 24 __attribute_pure__ 25 static const char * http_request_check_uri_strict (const uint8_t * const restrict s, const uint_fast32_t len) { 26 for (uint_fast32_t i = 0; i < len; ++i) { 27 if (__builtin_expect( (s[i] <= 32), 0)) return (const char *)s+i; 28 if (__builtin_expect( (s[i] == 127), 0)) return (const char *)s+i; 29 if (__builtin_expect( (s[i] == 255), 0)) return (const char *)s+i; 30 } 31 return NULL; 32 } 33 34 __attribute_nonnull__() 35 __attribute_pure__ 36 static const char * http_request_check_line_strict (const char * const restrict s, const uint_fast32_t len) { 37 for (uint_fast32_t i = 0; i < len; ++i) { 38 if (__builtin_expect( (((const uint8_t *)s)[i]<32), 0) && s[i] != '\t') 39 return s+i; 40 if (__builtin_expect( (s[i] == 127), 0)) 41 return s+i; 42 } 43 return NULL; 44 } 45 46 __attribute_nonnull__() 47 __attribute_pure__ 48 static const char * http_request_check_line_minimal (const char * const restrict s, const uint_fast32_t len) { 49 for (uint_fast32_t i = 0; i < len; ++i) { 50 if (__builtin_expect( (s[i] == '\0'), 0)) return s+i; 51 if (__builtin_expect( (s[i] == '\n'), 0)) return s+i; 52 } 53 return NULL; 54 } 55 56 static int request_check_hostname(buffer * const host) { 57 /* 58 * hostport = host [ ":" port ] 59 * host = hostname | IPv4address | IPv6address 60 * hostname = *( domainlabel "." ) toplabel [ "." ] 61 * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum 62 * toplabel = alpha | alpha *( alphanum | "-" ) alphanum 63 * IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit 64 * IPv6address = "[" ... "]" 65 * port = *digit 66 */ 67 68 const char *h = host->ptr; 69 70 if (*h != '[') { 71 uint32_t len = buffer_clen(host); 72 const char * const colon = memchr(h, ':', len); 73 uint32_t hlen = colon ? (uint32_t)(colon - h) : len; 74 75 /* if hostname ends in ".", strip it */ 76 if (__builtin_expect( (0 == hlen), 0)) return -1; 77 if (__builtin_expect( (h[hlen-1] == '.'), 0)) { 78 /* shift port info one left */ 79 if (--hlen == 0) return -1; 80 --len; 81 if (NULL != colon) 82 memmove(host->ptr+hlen, colon, len - hlen); 83 buffer_truncate(host, len); 84 } 85 86 int label_len = 0; 87 int allnumeric = 1; 88 int numeric = 1; 89 int level = 0; 90 for (uint32_t i = 0; i < hlen; ++i) { 91 const int ch = h[i]; 92 ++label_len; 93 if (light_isdigit(ch)) 94 continue; 95 else if ((light_isalpha(ch) || (ch == '-' && i != 0))) 96 numeric = 0; 97 else if (ch == '.' && 1 != label_len && '-' != h[i+1]) { 98 allnumeric &= numeric; 99 numeric = 1; 100 label_len = 0; 101 ++level; 102 } 103 else 104 return -1; 105 } 106 /* (if last segment numeric, then IPv4 and must have 4 numeric parts) */ 107 if (0 == label_len || (numeric && (level != 3 || !allnumeric))) 108 return -1; 109 110 h += hlen; 111 } 112 else { /* IPv6 address */ 113 /* check the address inside [...]; note: not fully validating */ 114 /* (note: not allowing scoped literals, e.g. %eth0 suffix) */ 115 ++h; /* step past '[' */ 116 int cnt = 0; 117 while (light_isxdigit(*h) || *h == '.' || (*h == ':' && ++cnt < 8)) ++h; 118 /*(invalid char, too many ':', missing ']', or empty "[]")*/ 119 if (*h != ']' || h - host->ptr == 1) return -1; 120 ++h; /* step past ']' */ 121 } 122 123 /* check numerical port, if present */ 124 if (*h == ':') { 125 if (__builtin_expect( (h[1] == '\0'), 0)) /*(remove trailing colon)*/ 126 buffer_truncate(host, h - host->ptr); 127 do { ++h; } while (light_isdigit(*h)); 128 } 129 130 return (*h == '\0') ? 0 : -1; 131 } 132 133 int http_request_host_normalize(buffer * const b, const int scheme_port) { 134 /* 135 * check for and canonicalize numeric IP address and portnum (optional) 136 * (IP address may be followed by ":portnum" (optional)) 137 * - IPv6: "[...]" 138 * - IPv4: "x.x.x.x" 139 * - IPv4: 12345678 (32-bit decimal number) 140 * - IPv4: 012345678 (32-bit octal number) 141 * - IPv4: 0x12345678 (32-bit hex number) 142 * 143 * allow any chars (except ':' and '\0' and stray '[' or ']') 144 * (other code may check chars more strictly or more pedantically) 145 * ':' delimits (optional) port at end of string 146 * "[]" wraps IPv6 address literal 147 * '\0' should have been rejected earlier were it present 148 * 149 * any chars includes, but is not limited to: 150 * - allow '-' any where, even at beginning of word 151 * (security caution: might be confused for cmd flag if passed to shell) 152 * - allow all-digit TLDs 153 * (might be mistaken for IPv4 addr by inet_aton() 154 * unless non-digits appear in subdomain) 155 */ 156 157 /* Note: not using getaddrinfo() since it does not support "[]" around IPv6 158 * and is not as lenient as inet_aton() and inet_addr() for IPv4 strings. 159 * Not using inet_pton() (when available) on IPv4 for similar reasons. */ 160 161 const char * const p = b->ptr; 162 const size_t blen = buffer_clen(b); 163 long port = 0; 164 165 if (*p != '[') { 166 char * const colon = (char *)memchr(p, ':', blen); 167 if (colon) { 168 if (*p == ':') return -1; /*(empty host then port, or naked IPv6)*/ 169 if (colon[1] != '\0') { 170 char *e; 171 port = strtol(colon+1, &e, 0); /*(allow decimal, octal, hex)*/ 172 if (0 < port && port <= USHRT_MAX && *e == '\0') { 173 /* valid port */ 174 } else { 175 return -1; 176 } 177 } /*(else ignore stray colon at string end)*/ 178 buffer_truncate(b, (size_t)(colon - p)); /*(remove port str)*/ 179 } 180 181 if (light_isdigit(*p)) do { 182 /* (IPv4 address literal or domain starting w/ digit (e.g. 3com))*/ 183 /* (check one-element cache of normalized IPv4 address string) */ 184 static struct { char s[INET_ADDRSTRLEN]; size_t n; } laddr; 185 size_t n = colon ? (size_t)(colon - p) : blen; 186 sock_addr addr; 187 if (n == laddr.n && 0 == memcmp(p, laddr.s, n)) break; 188 if (1 == sock_addr_inet_pton(&addr, p, AF_INET, 0)) { 189 sock_addr_inet_ntop_copy_buffer(b, &addr); 190 n = buffer_clen(b); 191 if (n < sizeof(laddr.s)) memcpy(laddr.s, b->ptr, (laddr.n = n)); 192 } 193 } while (0); 194 } else do { /* IPv6 addr */ 195 #if defined(HAVE_IPV6) && defined(HAVE_INET_PTON) 196 197 /* (check one-element cache of normalized IPv4 address string) */ 198 static struct { char s[INET6_ADDRSTRLEN]; size_t n; } laddr; 199 sock_addr addr; 200 char *bracket = b->ptr+blen-1; 201 char *percent = strchr(b->ptr+1, '%'); 202 size_t len; 203 int rc; 204 char buf[INET6_ADDRSTRLEN+16]; /*(+16 for potential %interface name)*/ 205 if (blen <= 2) return -1; /*(invalid "[]")*/ 206 if (*bracket != ']') { 207 bracket = (char *)memchr(b->ptr+1, ']', blen-1); 208 if (NULL == bracket || bracket[1] != ':' || bracket - b->ptr == 1){ 209 return -1; 210 } 211 if (bracket[2] != '\0') { /*(ignore stray colon at string end)*/ 212 char *e; 213 port = strtol(bracket+2, &e, 0); /*(allow decimal, octal, hex)*/ 214 if (0 < port && port <= USHRT_MAX && *e == '\0') { 215 /* valid port */ 216 } else { 217 return -1; 218 } 219 } 220 } 221 222 len = (size_t)((percent ? percent : bracket) - (b->ptr+1)); 223 if (laddr.n == len && 0 == memcmp(laddr.s, b->ptr+1, len)) { 224 /* truncate after ']' and re-add normalized port, if needed */ 225 buffer_truncate(b, (size_t)(bracket - b->ptr + 1)); 226 break; 227 } 228 229 *bracket = '\0';/*(terminate IPv6 string)*/ 230 if (percent) *percent = '\0'; /*(remove %interface from address)*/ 231 rc = sock_addr_inet_pton(&addr, b->ptr+1, AF_INET6, 0); 232 if (percent) *percent = '%'; /*(restore %interface)*/ 233 *bracket = ']'; /*(restore bracket)*/ 234 if (1 != rc) return -1; 235 236 sock_addr_inet_ntop(&addr, buf, sizeof(buf)); 237 len = strlen(buf); 238 if (percent) { 239 if (percent > bracket) return -1; 240 if (len + (size_t)(bracket - percent) >= sizeof(buf)) return -1; 241 if (len < sizeof(laddr.s)) memcpy(laddr.s, buf, (laddr.n = len)); 242 memcpy(buf+len, percent, (size_t)(bracket - percent)); 243 len += (size_t)(bracket - percent); 244 } 245 buffer_truncate(b, 1); /* truncate after '[' */ 246 buffer_append_str2(b, buf, len, CONST_STR_LEN("]")); 247 248 #else 249 250 return -1; 251 252 #endif 253 } while (0); 254 255 if (0 != port && port != scheme_port) { 256 buffer_append_string_len(b, CONST_STR_LEN(":")); 257 buffer_append_int(b, (int)port); 258 } 259 260 return 0; 261 } 262 263 int http_request_host_policy (buffer * const b, const unsigned int http_parseopts, const int scheme_port) { 264 /* caller should lowercase, as is done in http_request_header_set_Host(), 265 * for consistency in case the value is used prior to calling policy func */ 266 /*buffer_to_lower(b);*/ 267 return (((http_parseopts & HTTP_PARSEOPT_HOST_STRICT) 268 ? 0 != request_check_hostname(b) 269 : NULL != http_request_check_line_minimal(BUF_PTR_LEN(b))) 270 || ((http_parseopts & HTTP_PARSEOPT_HOST_NORMALIZE) 271 && 0 != http_request_host_normalize(b, scheme_port))); 272 } 273 274 __attribute_cold__ 275 __attribute_noinline__ 276 static int http_request_header_line_invalid(request_st * const restrict r, const int status, const char * const restrict msg) { 277 if (r->conf.log_request_header_on_error) { 278 if (msg) log_error(r->conf.errh, __FILE__, __LINE__, "%s", msg); 279 } 280 return status; 281 } 282 283 __attribute_cold__ 284 __attribute_noinline__ 285 static int http_request_header_char_invalid(request_st * const restrict r, const char ch, const char * const restrict msg) { 286 if (r->conf.log_request_header_on_error) { 287 if ((unsigned char)ch > 32 && ch != 127) { 288 log_error(r->conf.errh, __FILE__, __LINE__, "%s ('%c')", msg, ch); 289 } 290 else { 291 log_error(r->conf.errh, __FILE__, __LINE__, "%s (0x%x)", msg, ch); 292 } 293 } 294 return 400; 295 } 296 297 298 __attribute_noinline__ 299 static void http_request_header_set_Host(request_st * const restrict r, const char * const h, size_t hlen) 300 { 301 r->http_host = http_header_request_set_ptr(r, HTTP_HEADER_HOST, 302 CONST_STR_LEN("Host")); 303 buffer_copy_string_len_lc(r->http_host, h, hlen); 304 } 305 306 307 int64_t 308 li_restricted_strtoint64 (const char *v, const uint32_t vlen, const char ** const err) 309 { 310 /* base 10 strtoll() parsing exactly vlen chars and requiring digits 0-9 */ 311 /* rejects negative numbers and considers values > INT64_MAX an error */ 312 /* note: errno is not set; detect error if *err != v+vlen upon return */ 313 /*(caller must check 0 == vlen if that is to be an error for caller)*/ 314 int64_t rv = 0; 315 uint32_t i; 316 for (i = 0; i < vlen; ++i) { 317 const uint8_t c = ((uint8_t *)v)[i] - '0'; /*(unsigned; underflow ok)*/ 318 if (c > 9) break; 319 if (rv > INT64_MAX/10) break; 320 rv *= 10; 321 if (rv > INT64_MAX - c) break; 322 rv += c; 323 } 324 *err = v+i; 325 return rv; 326 } 327 328 329 __attribute_cold__ 330 static int http_request_parse_duplicate(request_st * const restrict r, const enum http_header_e id, const char * const restrict k, const size_t klen, const char * const restrict v, const size_t vlen) { 331 /* Proxies sometimes send dup headers 332 * if they are the same we ignore the second 333 * if not, we raise an error */ 334 const buffer * const vb = http_header_request_get(r, id, k, klen); 335 if (vb && buffer_eq_icase_slen(vb, v, vlen)) 336 return 0; /* ignore header; matches existing header */ 337 338 const char *errmsg; 339 switch (id) { 340 case HTTP_HEADER_HOST: 341 errmsg = "duplicate Host header -> 400"; 342 break; 343 case HTTP_HEADER_CONTENT_TYPE: 344 errmsg = "duplicate Content-Type header -> 400"; 345 break; 346 case HTTP_HEADER_IF_MODIFIED_SINCE: 347 errmsg = "duplicate If-Modified-Since header -> 400"; 348 break; 349 case HTTP_HEADER_HTTP2_SETTINGS: 350 errmsg = "duplicate HTTP2-Settings header -> 400"; 351 break; 352 default: 353 errmsg = "duplicate header -> 400"; 354 break; 355 case HTTP_HEADER_IF_NONE_MATCH: 356 /* if dup, only the first one will survive */ 357 return 0; /* ignore header */ 358 } 359 return http_request_header_line_invalid(r, 400, errmsg); 360 } 361 362 363 /* add header to list of headers 364 * certain headers are also parsed 365 * might drop a header if deemed unnecessary/broken 366 * 367 * returns 0 on success, HTTP status on error 368 */ 369 static int http_request_parse_single_header(request_st * const restrict r, const enum http_header_e id, const char * const restrict k, const size_t klen, const char * const restrict v, const size_t vlen) { 370 /* 371 * Note: k might not be '\0'-terminated 372 * Note: v is not '\0'-terminated 373 * With lighttpd HTTP/1.1 parser, v ends with whitespace 374 * (one of '\r' '\n' ' ' '\t') 375 * With lighttpd HTTP/2 parser, v should not be accessed beyond vlen 376 * (care must be taken to avoid libc funcs which expect z-strings) 377 */ 378 /*assert(vlen);*//*(caller must not call this func with 0 klen or 0 vlen)*/ 379 380 switch (id) { 381 /*case HTTP_HEADER_OTHER:*/ 382 default: 383 break; 384 case HTTP_HEADER_HOST: 385 if (!light_btst(r->rqst_htags, HTTP_HEADER_HOST)) { 386 if (vlen >= 1024) { /*(expecting < 256)*/ 387 return http_request_header_line_invalid(r, 400, "uri-authority too long -> 400"); 388 } 389 /*(http_request_header_append() plus sets r->http_host)*/ 390 http_request_header_set_Host(r, v, vlen); 391 return 0; 392 } 393 else if (NULL != r->http_host 394 && __builtin_expect( buffer_eq_slen(r->http_host,v,vlen), 1)) { 395 /* ignore all Host: headers if match authority in request line */ 396 /* (expect Host to match case in :authority of HTTP/2 request) */ 397 return 0; /* ignore header */ 398 } 399 /* else parse duplicate for match or error */ 400 __attribute_fallthrough__ 401 case HTTP_HEADER_IF_MODIFIED_SINCE: 402 case HTTP_HEADER_IF_NONE_MATCH: 403 case HTTP_HEADER_CONTENT_TYPE: 404 case HTTP_HEADER_HTTP2_SETTINGS: 405 if (light_btst(r->rqst_htags, id)) 406 return http_request_parse_duplicate(r, id, k, klen, v, vlen); 407 break; 408 case HTTP_HEADER_CONNECTION: 409 /* "Connection: close" is common case if header is present */ 410 if ((vlen == 5 && buffer_eq_icase_ssn(v, CONST_STR_LEN("close"))) 411 || http_header_str_contains_token(v,vlen,CONST_STR_LEN("close"))) { 412 r->keep_alive = 0; 413 break; 414 } 415 if (http_header_str_contains_token(v,vlen,CONST_STR_LEN("keep-alive"))){ 416 r->keep_alive = 1; 417 break; 418 } 419 break; 420 case HTTP_HEADER_CONTENT_LENGTH: 421 if (!light_btst(r->rqst_htags, HTTP_HEADER_CONTENT_LENGTH)) { 422 /*(trailing whitespace was removed from vlen)*/ 423 /*(not using strtoll() since v might not be z-string)*/ 424 const char *err; 425 off_t clen = (off_t)li_restricted_strtoint64(v, vlen, &err); 426 if (err == v+vlen) { 427 /* (set only if not set to -1 by Transfer-Encoding: chunked) */ 428 if (0 == r->reqbody_length) r->reqbody_length = clen; 429 } 430 else { 431 return http_request_header_line_invalid(r, 400, "invalid Content-Length header -> 400"); 432 } 433 } 434 else { 435 return http_request_header_line_invalid(r, 400, "duplicate Content-Length header -> 400"); 436 } 437 break; 438 case HTTP_HEADER_TRANSFER_ENCODING: 439 if (HTTP_VERSION_1_1 != r->http_version) { 440 return http_request_header_line_invalid(r, 400, 441 HTTP_VERSION_1_0 == r->http_version 442 ? "HTTP/1.0 with Transfer-Encoding (bad HTTP/1.0 proxy?) -> 400" 443 : "HTTP/2 with Transfer-Encoding is invalid -> 400"); 444 } 445 446 if (!buffer_eq_icase_ss(v, vlen, CONST_STR_LEN("chunked"))) { 447 /* Transfer-Encoding might contain additional encodings, 448 * which are not currently supported by lighttpd */ 449 return http_request_header_line_invalid(r, 501, NULL); /* Not Implemented */ 450 } 451 r->reqbody_length = -1; 452 453 /* Transfer-Encoding is a hop-by-hop header, 454 * which must not be blindly forwarded to backends */ 455 return 0; /* skip header */ 456 } 457 458 http_header_request_append(r, id, k, klen, v, vlen); 459 return 0; 460 } 461 462 __attribute_cold__ 463 static int http_request_parse_proto_loose(request_st * const restrict r, const char * const restrict ptr, const size_t len, const unsigned int http_parseopts) { 464 const char * proto = memchr(ptr, ' ', len); 465 if (NULL == proto) 466 return http_request_header_line_invalid(r, 400, "incomplete request line -> 400"); 467 proto = memchr(proto+1, ' ', len - (proto+1 - ptr)); 468 if (NULL == proto) 469 return http_request_header_line_invalid(r, 400, "incomplete request line -> 400"); 470 ++proto; 471 472 if (proto[0]=='H' && proto[1]=='T' && proto[2]=='T' && proto[3]=='P' && proto[4] == '/') { 473 if (proto[5] == '1' && proto[6] == '.' && (proto[7] == '1' || proto[7] == '0')) { 474 /* length already checked before calling this routine */ 475 /* (len != (size_t)(proto - ptr + 8)) */ 476 if (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT) /*(http_header_strict)*/ 477 return http_request_header_line_invalid(r, 400, "incomplete request line -> 400"); 478 r->http_version = (proto[7] == '1') ? HTTP_VERSION_1_1 : HTTP_VERSION_1_0; 479 } 480 else 481 return http_request_header_line_invalid(r, 505, "unknown HTTP version -> 505"); 482 } 483 else 484 return http_request_header_line_invalid(r, 400, "unknown protocol -> 400"); 485 486 /* keep-alive default: HTTP/1.1 -> true; HTTP/1.0 -> false */ 487 r->keep_alive = (HTTP_VERSION_1_0 != r->http_version); 488 489 return 0; 490 } 491 492 __attribute_cold__ 493 static const char * http_request_parse_reqline_uri(request_st * const restrict r, const char * const restrict uri, const size_t len, const unsigned int http_parseopts) { 494 const char *nuri; 495 if ((len > 7 && buffer_eq_icase_ssn(uri, "http://", 7) 496 && NULL != (nuri = memchr(uri + 7, '/', len-7))) 497 || 498 (len > 8 && buffer_eq_icase_ssn(uri, "https://", 8) 499 && NULL != (nuri = memchr(uri + 8, '/', len-8)))) { 500 const char * const host = uri + (uri[4] == ':' ? 7 : 8); 501 const size_t hostlen = nuri - host; 502 if (0 == hostlen || hostlen >= 1024) { /*(expecting < 256)*/ 503 http_request_header_line_invalid(r, 400, "uri-authority empty or too long -> 400"); 504 return NULL; 505 } 506 /* Insert as "Host" header */ 507 http_request_header_set_Host(r, host, hostlen); 508 return nuri; 509 } else if (!(http_parseopts & HTTP_PARSEOPT_HEADER_STRICT) /*(!http_header_strict)*/ 510 || (HTTP_METHOD_CONNECT == r->http_method && (uri[0] == ':' || light_isdigit(uri[0]))) 511 || (HTTP_METHOD_OPTIONS == r->http_method && uri[0] == '*' && 1 == len)) { 512 /* (permitted) */ 513 return uri; 514 } else { 515 http_request_header_line_invalid(r, 400, "request-URI parse error -> 400"); 516 return NULL; 517 } 518 } 519 520 521 __attribute_cold__ 522 __attribute_noinline__ 523 static int http_request_parse_header_other(request_st * const restrict r, const char * const restrict k, const int klen, const unsigned int http_header_strict); 524 525 526 int 527 http_request_validate_pseudohdrs (request_st * const restrict r, const int scheme, const unsigned int http_parseopts) 528 { 529 /* :method is required to indicate method 530 * CONNECT method must have :method and :authority 531 * All other methods must have at least :method :scheme :path */ 532 533 if (HTTP_METHOD_UNSET == r->http_method) 534 return http_request_header_line_invalid(r, 400, 535 "missing pseudo-header method -> 400"); 536 537 if (__builtin_expect( (HTTP_METHOD_CONNECT != r->http_method), 1)) { 538 if (!scheme) 539 return http_request_header_line_invalid(r, 400, 540 "missing pseudo-header scheme -> 400"); 541 542 if (buffer_is_blank(&r->target)) 543 return http_request_header_line_invalid(r, 400, 544 "missing pseudo-header path -> 400"); 545 546 const char * const uri = r->target.ptr; 547 if (*uri != '/') { /* (common case: (*uri == '/')) */ 548 if (uri[0] != '*' || uri[1] != '\0' 549 || HTTP_METHOD_OPTIONS != r->http_method) 550 return http_request_header_line_invalid(r, 400, 551 "invalid pseudo-header path -> 400"); 552 } 553 } 554 else { /* HTTP_METHOD_CONNECT */ 555 if (NULL == r->http_host) 556 return http_request_header_line_invalid(r, 400, 557 "missing pseudo-header authority -> 400"); 558 if (!buffer_is_blank(&r->target) || scheme) 559 return http_request_header_line_invalid(r, 400, 560 "invalid pseudo-header with CONNECT -> 400"); 561 /* note: this copy occurs prior to http_request_host_policy() 562 * so any consumer handling CONNECT should normalize r->target 563 * as appropriate */ 564 buffer_copy_buffer(&r->target, r->http_host); 565 } 566 buffer_copy_buffer(&r->target_orig, &r->target); 567 568 /* r->http_host, if set, is checked with http_request_host_policy() 569 * in http_request_parse() */ 570 571 /* copied and modified from end of http_request_parse_reqline() */ 572 573 /* check uri for invalid characters */ 574 const uint32_t len = buffer_clen(&r->target);/*(http_header_strict)*/ 575 const char * const x = (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT) 576 ? (http_parseopts & HTTP_PARSEOPT_URL_NORMALIZE_CTRLS_REJECT) 577 ? NULL /* URI will be checked in http_request_parse_target() */ 578 : http_request_check_uri_strict((const uint8_t *)r->target.ptr, len) 579 : http_request_check_line_minimal(r->target.ptr, len); 580 return (NULL == x) 581 ? 0 582 : http_request_header_char_invalid(r, *x, 583 "invalid character in URI -> 400"); 584 } 585 586 587 int 588 http_request_parse_header (request_st * const restrict r, http_header_parse_ctx * const restrict hpctx) 589 { 590 /* Note: k and v might not be '\0' terminated strings; 591 * care must be taken to avoid libc funcs which expect z-strings */ 592 const char * const restrict k = hpctx->k; 593 const char * const restrict v = hpctx->v; 594 const uint32_t klen = hpctx->klen; 595 const uint32_t vlen = hpctx->vlen; 596 597 if (0 == klen) 598 return http_request_header_line_invalid(r, 400, 599 "invalid header key -> 400"); 600 601 if ((hpctx->hlen += klen + vlen + 4) > hpctx->max_request_field_size) { 602 /*(configurable with server.max-request-field-size; default 8k)*/ 603 #if 1 /* emit to error log for people sending large headers */ 604 log_error(r->conf.errh, __FILE__, __LINE__, 605 "oversized request header -> 431"); 606 return 431; /* Request Header Fields Too Large */ 607 #else 608 /* 431 Request Header Fields Too Large */ 609 return http_request_header_line_invalid(r, 431, 610 "oversized request header -> 431"); 611 #endif 612 } 613 614 if (!hpctx->trailers) { 615 if (*k == ':') { 616 /* HTTP/2 request pseudo-header fields */ 617 if (!hpctx->pseudo) /*(pseudo header after non-pseudo header)*/ 618 return http_request_header_line_invalid(r, 400, 619 "invalid pseudo-header -> 400"); 620 if (0 == vlen) 621 return http_request_header_line_invalid(r, 400, 622 "invalid header value -> 400"); 623 624 /* (note: relies on implementation details using ls-hpack in h2.c) 625 * (hpctx->id mapped from lsxpack_header_t hpack_index, which only 626 * matches key, not also value, if lsxpack_header_t flags does not 627 * have LSXPACK_HPACK_VAL_MATCHED set, so HTTP_HEADER_H2_METHOD_GET 628 * below indicates any method, not only "GET") */ 629 if (__builtin_expect( (hpctx->id == HTTP_HEADER_H2_UNKNOWN), 0)) { 630 switch (klen-1) { 631 case 4: 632 if (0 == memcmp(k+1, "path", 4)) 633 hpctx->id = HTTP_HEADER_H2_PATH; 634 break; 635 case 6: 636 if (0 == memcmp(k+1, "method", 6)) 637 hpctx->id = HTTP_HEADER_H2_METHOD_GET; 638 else if (0 == memcmp(k+1, "scheme", 6)) 639 hpctx->id = HTTP_HEADER_H2_SCHEME_HTTP; 640 break; 641 case 9: 642 if (0 == memcmp(k+1, "authority", 9)) 643 hpctx->id = HTTP_HEADER_H2_AUTHORITY; 644 break; 645 default: 646 break; 647 } 648 if (hpctx->id >= HTTP_HEADER_H2_UNKNOWN) 649 return http_request_header_line_invalid(r, 400, 650 "invalid pseudo-header -> 400"); 651 } 652 653 switch (hpctx->id) { 654 case HTTP_HEADER_H2_AUTHORITY: 655 if (__builtin_expect( (r->http_host != NULL), 0)) 656 break; 657 if (vlen >= 1024) /*(expecting < 256)*/ 658 return http_request_header_line_invalid(r, 400, 659 "invalid pseudo-header authority too long -> 400"); 660 /* insert as "Host" header */ 661 http_request_header_set_Host(r, v, vlen); 662 return 0; 663 case HTTP_HEADER_H2_METHOD_GET: /*(any method, not only "GET")*/ 664 case HTTP_HEADER_H2_METHOD_POST: 665 if (__builtin_expect( (HTTP_METHOD_UNSET != r->http_method), 0)) 666 break; 667 r->http_method = get_http_method_key(v, vlen); 668 if (HTTP_METHOD_UNSET >= r->http_method) 669 return http_request_header_line_invalid(r, 501, 670 "unknown http-method -> 501"); 671 return 0; 672 case HTTP_HEADER_H2_PATH: /*(any path, not only "/")*/ 673 case HTTP_HEADER_H2_PATH_INDEX_HTML: 674 if (__builtin_expect( (!buffer_is_blank(&r->target)), 0)) 675 break; 676 buffer_copy_string_len(&r->target, v, vlen); 677 return 0; 678 case HTTP_HEADER_H2_SCHEME_HTTP: /*(any scheme, not only "http")*/ 679 case HTTP_HEADER_H2_SCHEME_HTTPS: 680 if (__builtin_expect( (hpctx->scheme), 0)) 681 break; 682 hpctx->scheme = 1; /*(marked present, but otherwise ignored)*/ 683 return 0; 684 #if 0 685 switch (vlen) {/*(validated, but then ignored)*/ 686 case 5: /* "https" */ 687 if (v[4]!='s') break; 688 __attribute_fallthrough__ 689 case 4: /* "http" */ 690 if (v[0]=='h' && v[1]=='t' && v[2]=='t' && v[3]=='p') { 691 hpctx->scheme = 1; 692 return 0; 693 } 694 break; 695 default: 696 break; 697 } 698 return http_request_header_line_invalid(r, 400, 699 "unknown pseudo-header scheme -> 400"); 700 #endif 701 default: 702 return http_request_header_line_invalid(r, 400, 703 "invalid pseudo-header -> 400"); 704 } 705 return http_request_header_line_invalid(r, 400, 706 "repeated pseudo-header -> 400"); 707 } 708 else { /*(non-pseudo headers)*/ 709 if (hpctx->pseudo) { /*(transition to non-pseudo headers)*/ 710 hpctx->pseudo = 0; 711 int status = 712 http_request_validate_pseudohdrs(r, hpctx->scheme, 713 hpctx->http_parseopts); 714 if (0 != status) return status; 715 } 716 if (0 == vlen) 717 return 0; 718 719 const unsigned int http_header_strict = 720 (hpctx->http_parseopts & HTTP_PARSEOPT_HEADER_STRICT); 721 722 const char * const x = (http_header_strict) 723 ? http_request_check_line_strict(v, vlen) 724 : http_request_check_line_minimal(v, vlen); 725 if (x) 726 return http_request_header_char_invalid(r, *x, 727 "invalid character in header -> 400"); 728 729 if (__builtin_expect( (hpctx->id == HTTP_HEADER_H2_UNKNOWN), 0)) { 730 uint32_t j = 0; 731 while (j < klen && (light_islower(k[j]) || k[j] == '-')) 732 ++j; 733 734 if (__builtin_expect( (j != klen), 0)) { 735 if (light_isupper(k[j])) 736 return 400; 737 if (0 != http_request_parse_header_other(r, k+j, klen-j, 738 http_header_strict)) 739 return 400; 740 } 741 742 hpctx->id = http_header_hkey_get_lc(k, klen); 743 } 744 745 const enum http_header_e id = (enum http_header_e)hpctx->id; 746 747 if (__builtin_expect( (id == HTTP_HEADER_TE), 0) 748 && !buffer_eq_icase_ss(v, vlen, CONST_STR_LEN("trailers"))) 749 return http_request_header_line_invalid(r, 400, 750 "invalid TE header value with HTTP/2 -> 400"); 751 752 return http_request_parse_single_header(r, id, k, klen, v, vlen); 753 } 754 } 755 else { /*(trailers)*/ 756 if (*k == ':') 757 return http_request_header_line_invalid(r, 400, 758 "invalid pseudo-header in trailers -> 400"); 759 /* ignore trailers (after required HPACK decoding) if streaming 760 * request body to backend since headers have already been sent 761 * to backend via Common Gateway Interface (CGI) (CGI, FastCGI, 762 * SCGI, etc) or HTTP/1.1 (proxy) (mod_proxy does not currently 763 * support using HTTP/2 to connect to backends) */ 764 #if 0 /* (if needed, save flag in hpctx instead of fdevent.h dependency)*/ 765 if (r->conf.stream_request_body & FDEVENT_STREAM_REQUEST) 766 return 0; 767 #endif 768 /* Note: do not unconditionally merge into headers since if 769 * headers had already been sent to backend, then mod_accesslog 770 * logging of request headers might be inaccurate. 771 * Many simple backends do not support HTTP/1.1 requests sending 772 * Transfer-Encoding: chunked, and even those that do might not 773 * handle trailers. Some backends do not even support HTTP/1.1. 774 * For all these reasons, ignore trailers if streaming request 775 * body to backend. Revisit in future if adding support for 776 * connecting to backends using HTTP/2 (with explicit config 777 * option to force connecting to backends using HTTP/2) */ 778 779 /* XXX: TODO: request trailers not handled if streaming reqbody 780 * XXX: must ensure that trailers are not disallowed field-names 781 */ 782 783 #if 0 784 if (0 == vlen) 785 return 0; 786 #endif 787 788 return 0; 789 } 790 } 791 792 793 static int http_request_parse_reqline(request_st * const restrict r, const char * const restrict ptr, const unsigned short * const restrict hoff, const unsigned int http_parseopts) { 794 size_t len = hoff[2]; 795 796 /* parse the first line of the request 797 * <method> <uri> <protocol>\r\n 798 * */ 799 if (len < 13) /* minimum len with (!http_header_strict): "x x HTTP/1.0\n" */ 800 return http_request_header_line_invalid(r, 400, "invalid request line (too short) -> 400"); 801 if (ptr[len-2] == '\r') 802 len-=2; 803 else if (!(http_parseopts & HTTP_PARSEOPT_HEADER_STRICT)) /*(!http_header_strict)*/ 804 len-=1; 805 else 806 return http_request_header_line_invalid(r, 400, "missing CR before LF in header -> 400"); 807 808 /* 809 * RFC7230: 810 * HTTP-version = HTTP-name "/" DIGIT "." DIGIT 811 * HTTP-name = %x48.54.54.50 ; "HTTP", case-sensitive 812 */ 813 814 /* protocol is expected to be " HTTP/1.1" or " HTTP/1.0" at end of line */ 815 union proto_un { 816 char c[8]; 817 uint64_t u; 818 }; 819 static const union proto_un http_1_1 = {{'H','T','T','P','/','1','.','1'}}; 820 static const union proto_un http_1_0 = {{'H','T','T','P','/','1','.','0'}}; 821 const char *p = ptr + len - 8; 822 union proto_un proto8; 823 proto8.c[0]=p[0]; proto8.c[1]=p[1]; proto8.c[2]=p[2]; proto8.c[3]=p[3]; 824 proto8.c[4]=p[4]; proto8.c[5]=p[5]; proto8.c[6]=p[6]; proto8.c[7]=p[7]; 825 if (p[-1] == ' ' && http_1_1.u == proto8.u) { 826 r->http_version = HTTP_VERSION_1_1; 827 r->keep_alive = 1; /* keep-alive default: HTTP/1.1 -> true */ 828 } 829 else if (p[-1] == ' ' && http_1_0.u == proto8.u) { 830 r->http_version = HTTP_VERSION_1_0; 831 r->keep_alive = 0; /* keep-alive default: HTTP/1.0 -> false */ 832 } 833 else { 834 int status = http_request_parse_proto_loose(r,ptr,len,http_parseopts); 835 if (0 != status) return status; 836 /*(space char must exist if http_request_parse_proto_loose() succeeds)*/ 837 for (p = ptr + len - 9; p[-1] != ' '; --p) ; 838 } 839 840 /* method is expected to be a short string in the general case */ 841 size_t i = 0; 842 while (ptr[i] != ' ') ++i; 843 #if 0 /*(space must exist if protocol was parsed successfully)*/ 844 while (i < len && ptr[i] != ' ') ++i; 845 if (ptr[i] != ' ') 846 return http_request_header_line_invalid(r, 400, "incomplete request line -> 400"); 847 #endif 848 849 r->http_method = get_http_method_key(ptr, i); 850 if (HTTP_METHOD_UNSET >= r->http_method) 851 return http_request_header_line_invalid(r, 501, "unknown http-method -> 501"); 852 853 const char *uri = ptr + i + 1; 854 855 if (uri == p) 856 return http_request_header_line_invalid(r, 400, "no uri specified -> 400"); 857 len = (size_t)(p - uri - 1); 858 859 if (*uri != '/') { /* (common case: (*uri == '/')) */ 860 uri = http_request_parse_reqline_uri(r, uri, len, http_parseopts); 861 if (NULL == uri) return 400; 862 len = (size_t)(p - uri - 1); 863 } 864 865 if (0 == len) 866 return http_request_header_line_invalid(r, 400, "no uri specified -> 400"); 867 868 /* check uri for invalid characters */ /* http_header_strict */ 869 const char * const x = (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT) 870 ? (http_parseopts & HTTP_PARSEOPT_URL_NORMALIZE_CTRLS_REJECT) 871 ? NULL /* URI will be checked in http_request_parse_target() */ 872 : http_request_check_uri_strict((const uint8_t *)uri, len) 873 : memchr(ptr, '\0', hoff[hoff[0]]);/* check entire headers set for '\0' */ 874 if (x) 875 http_request_header_char_invalid(r, *x, "invalid character in URI -> 400"); 876 877 buffer_copy_string_len(&r->target, uri, len); 878 buffer_copy_string_len(&r->target_orig, uri, len); 879 return 0; 880 } 881 882 int http_request_parse_target(request_st * const r, int scheme_port) { 883 /* URI is parsed into components at start of request and may 884 * also be re-parsed upon HANDLER_COMEBACK during the request 885 * r->target is expected to be a "/url-part?query-part" 886 * (and *not* a fully-qualified URI starting https://...) 887 * r->uri.authority is expected to be parsed elsewhere into r->http_host 888 */ 889 890 /** 891 * prepare strings 892 * 893 * - uri.path 894 * - uri.query 895 * 896 */ 897 898 /** 899 * Name according to RFC 2396 900 * 901 * - scheme 902 * - authority 903 * - path 904 * - query 905 * 906 * (scheme)://(authority)(path)?(query)#fragment 907 * 908 */ 909 910 /* take initial scheme value from connection-level state 911 * (request r->uri.scheme can be overwritten for later, 912 * for example by mod_extforward or mod_magnet) */ 913 buffer_copy_string_len(&r->uri.scheme, "https", scheme_port == 443 ? 5 : 4); 914 915 buffer * const target = &r->target; 916 if (r->http_method == HTTP_METHOD_CONNECT 917 || (r->http_method == HTTP_METHOD_OPTIONS 918 && target->ptr[0] == '*' 919 && target->ptr[1] == '\0')) { 920 /* CONNECT ... (or) OPTIONS * ... */ 921 buffer_copy_buffer(&r->uri.path, target); 922 buffer_clear(&r->uri.query); 923 return 0; 924 } 925 926 char *qstr; 927 if (r->conf.http_parseopts & HTTP_PARSEOPT_URL_NORMALIZE) { 928 /*uint32_t len = buffer_clen(target);*/ 929 int qs = burl_normalize(target, r->tmp_buf, r->conf.http_parseopts); 930 if (-2 == qs) 931 return http_request_header_line_invalid(r, 400, 932 "invalid character in URI -> 400"); /* Bad Request */ 933 qstr = (-1 == qs) ? NULL : target->ptr+qs; 934 #if 0 /* future: might enable here, or below for all requests */ 935 /* (Note: total header size not recalculated on HANDLER_COMEBACK 936 * even if other request headers changed during processing) 937 * (If (0 != r->loops_per_request), then the generated 938 * request is too large. Should a different error be returned?) */ 939 r->rqst_header_len -= len; 940 len = buffer_clen(target); 941 r->rqst_header_len += len; 942 if (len > MAX_HTTP_REQUEST_URI) { 943 return 414; /* 414 URI Too Long */ 944 } 945 if (r->rqst_header_len > MAX_HTTP_REQUEST_HEADER) { 946 log_error(r->conf.errh, __FILE__, __LINE__, 947 "request header fields too large: %u -> 431", 948 r->rqst_header_len); 949 return 431; /* Request Header Fields Too Large */ 950 } 951 #endif 952 } 953 else { 954 size_t rlen = buffer_clen(target); 955 qstr = memchr(target->ptr, '#', rlen);/* discard fragment */ 956 if (qstr) { 957 rlen = (size_t)(qstr - target->ptr); 958 buffer_truncate(target, rlen); 959 } 960 qstr = memchr(target->ptr, '?', rlen); 961 } 962 963 /** extract query string from target */ 964 const char * const pstr = target->ptr; 965 const uint32_t rlen = buffer_clen(target); 966 uint32_t plen; 967 if (NULL != qstr) { 968 plen = (uint32_t)(qstr - pstr); 969 buffer_copy_string_len(&r->uri.query, qstr + 1, rlen - plen - 1); 970 } 971 else { 972 plen = rlen; 973 buffer_clear(&r->uri.query); 974 } 975 buffer_copy_string_len(&r->uri.path, pstr, plen); 976 977 /* decode url to path 978 * 979 * - decode url-encodings (e.g. %20 -> ' ') 980 * - remove path-modifiers (e.g. /../) 981 */ 982 983 buffer_urldecode_path(&r->uri.path); 984 buffer_path_simplify(&r->uri.path); 985 if (r->uri.path.ptr[0] != '/') 986 return http_request_header_line_invalid(r, 400, 987 "uri-path does not begin with '/' -> 400"); /* Bad Request */ 988 989 return 0; 990 } 991 992 __attribute_cold__ 993 __attribute_noinline__ 994 static int http_request_parse_header_other(request_st * const restrict r, const char * const restrict k, const int klen, const unsigned int http_header_strict) { 995 for (int i = 0; i < klen; ++i) { 996 if (light_isalpha(k[i]) || k[i] == '-') continue; /*(common cases)*/ 997 /** 998 * 1*<any CHAR except CTLs or separators> 999 * CTLs == 0-31 + 127, CHAR = 7-bit ascii (0..127) 1000 * 1001 */ 1002 switch(k[i]) { 1003 case ' ': 1004 case '\t': 1005 return http_request_header_line_invalid(r, 400, "WS character in key -> 400"); 1006 case '\r': 1007 case '\n': 1008 case '(': 1009 case ')': 1010 case '<': 1011 case '>': 1012 case '@': 1013 case ',': 1014 case ':': 1015 case ';': 1016 case '\\': 1017 case '\"': 1018 case '/': 1019 case '[': 1020 case ']': 1021 case '?': 1022 case '=': 1023 case '{': 1024 case '}': 1025 return http_request_header_char_invalid(r, k[i], "invalid character in header key -> 400"); 1026 default: 1027 if (http_header_strict ? (k[i] < 32 || ((unsigned char *)k)[i] >= 127) : k[i] == '\0') 1028 return http_request_header_char_invalid(r, k[i], "invalid character in header key -> 400"); 1029 break; /* ok */ 1030 } 1031 } 1032 return 0; 1033 } 1034 1035 static int http_request_parse_headers(request_st * const restrict r, char * const restrict ptr, const unsigned short * const restrict hoff, const unsigned int http_parseopts) { 1036 const unsigned int http_header_strict = (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT); 1037 1038 #if 0 /*(not checked here; will later result in invalid label for HTTP header)*/ 1039 int i = hoff[2]; 1040 1041 if (ptr[i] == ' ' || ptr[i] == '\t') { 1042 return http_request_header_line_invalid(r, 400, "WS at the start of first line -> 400"); 1043 } 1044 #endif 1045 1046 for (int i = 2; i < hoff[0]; ++i) { 1047 const char *k = ptr + hoff[i]; 1048 /* one past last line hoff[hoff[0]] is to final "\r\n" */ 1049 char *end = ptr + hoff[i+1]; 1050 1051 const char *colon = memchr(k, ':', end - k); 1052 if (NULL == colon) 1053 return http_request_header_line_invalid(r, 400, "invalid header missing ':' -> 400"); 1054 1055 const char *v = colon + 1; 1056 1057 /* RFC7230 Hypertext Transfer Protocol (HTTP/1.1): Message Syntax and Routing 1058 * 3.2.4. Field Parsing 1059 * [...] 1060 * No whitespace is allowed between the header field-name and colon. In 1061 * the past, differences in the handling of such whitespace have led to 1062 * security vulnerabilities in request routing and response handling. A 1063 * server MUST reject any received request message that contains 1064 * whitespace between a header field-name and colon with a response code 1065 * of 400 (Bad Request). A proxy MUST remove any such whitespace from a 1066 * response message before forwarding the message downstream. 1067 */ 1068 /* (line k[-1] is always preceded by a '\n', 1069 * including first header after request-line, 1070 * so no need to check colon != k) */ 1071 if (colon[-1] == ' ' || colon[-1] == '\t') { 1072 if (http_header_strict) { 1073 return http_request_header_line_invalid(r, 400, "invalid whitespace between field-name and colon -> 400"); 1074 } 1075 else { 1076 /* remove trailing whitespace from key(if !http_header_strict)*/ 1077 do { --colon; } while (colon[-1] == ' ' || colon[-1] == '\t'); 1078 } 1079 } 1080 1081 const int klen = (int)(colon - k); 1082 if (0 == klen) 1083 return http_request_header_line_invalid(r, 400, "invalid header key -> 400"); 1084 const enum http_header_e id = http_header_hkey_get(k, klen); 1085 1086 if (id == HTTP_HEADER_OTHER) { 1087 for (int j = 0; j < klen; ++j) { 1088 if (light_isalpha(k[j]) || k[j] == '-') continue; /*(common cases)*/ 1089 if (0 != http_request_parse_header_other(r, k+j, klen-j, http_header_strict)) 1090 return 400; 1091 break; 1092 } 1093 } 1094 1095 /* remove leading whitespace from value */ 1096 while (*v == ' ' || *v == '\t') ++v; 1097 1098 for (; i+1 <= hoff[0]; ++i) { 1099 end = ptr + hoff[i+1]; 1100 if (end[0] != ' ' && end[0] != '\t') break; 1101 1102 /* line folding */ 1103 #ifdef __COVERITY__ 1104 force_assert(end - k >= 2); 1105 #endif 1106 if (end[-2] == '\r') 1107 end[-2] = ' '; 1108 else if (http_header_strict) 1109 return http_request_header_line_invalid(r, 400, "missing CR before LF in header -> 400"); 1110 end[-1] = ' '; 1111 } 1112 #ifdef __COVERITY__ 1113 /*(buf holding k has non-zero request-line, so end[-2] valid)*/ 1114 force_assert(end >= k + 2); 1115 #endif 1116 if (end[-2] == '\r') 1117 --end; 1118 else if (http_header_strict) 1119 return http_request_header_line_invalid(r, 400, "missing CR before LF in header -> 400"); 1120 /* remove trailing whitespace from value (+ remove '\r\n') */ 1121 /* (line k[-1] is always preceded by a '\n', 1122 * including first header after request-line, 1123 * so no need to check (end != k)) */ 1124 do { --end; } while (end[-1] == ' ' || end[-1] == '\t'); 1125 1126 const int vlen = (int)(end - v); 1127 /* empty header-fields are not allowed by HTTP-RFC, we just ignore them */ 1128 if (vlen <= 0) continue; /* ignore header */ 1129 1130 if (http_header_strict) { 1131 const char * const x = http_request_check_line_strict(v, vlen); 1132 if (x) 1133 return http_request_header_char_invalid(r, *x, 1134 "invalid character in header -> 400"); 1135 } /* else URI already checked in http_request_parse_reqline() for any '\0' */ 1136 1137 int status = http_request_parse_single_header(r, id, k, (size_t)klen, v, (size_t)vlen); 1138 if (0 != status) return status; 1139 } 1140 1141 return 0; 1142 } 1143 1144 1145 static int 1146 http_request_parse (request_st * const restrict r, const int scheme_port) 1147 { 1148 int status = http_request_parse_target(r, scheme_port); 1149 if (0 != status) return status; 1150 1151 /* post-processing */ 1152 const unsigned int http_parseopts = r->conf.http_parseopts; 1153 1154 /* check hostname field if it is set */ 1155 /*(r->http_host might not be set until after parsing request headers)*/ 1156 if (__builtin_expect( (r->http_host != NULL), 1)) { 1157 if (0 != http_request_host_policy(r->http_host, 1158 http_parseopts, scheme_port)) 1159 return http_request_header_line_invalid(r, 400, "Invalid Hostname -> 400"); 1160 buffer_copy_buffer(&r->uri.authority, r->http_host); 1161 } 1162 else { 1163 buffer_copy_string_len(&r->uri.authority, CONST_STR_LEN("")); 1164 if (r->http_version >= HTTP_VERSION_1_1) 1165 return http_request_header_line_invalid(r, 400, "HTTP/1.1 but Host missing -> 400"); 1166 } 1167 1168 if (HTTP_VERSION_1_1 != r->http_version 1169 && (r->rqst_htags 1170 & (light_bshift(HTTP_HEADER_UPGRADE) 1171 |light_bshift(HTTP_HEADER_HTTP2_SETTINGS)))) { 1172 return http_request_header_line_invalid(r, 400, "invalid hop-by-hop header w/o HTTP/1.1 -> 400"); 1173 } 1174 1175 if (0 == r->reqbody_length) { 1176 /* POST requires Content-Length (or Transfer-Encoding) 1177 * (-1 == r->reqbody_length when Transfer-Encoding: chunked)*/ 1178 if (HTTP_METHOD_POST == r->http_method 1179 && !light_btst(r->rqst_htags, HTTP_HEADER_CONTENT_LENGTH)) { 1180 return http_request_header_line_invalid(r, 411, "POST-request, but content-length missing -> 411"); 1181 } 1182 } 1183 else { 1184 /* (-1 == r->reqbody_length when Transfer-Encoding: chunked)*/ 1185 if (-1 == r->reqbody_length 1186 && light_btst(r->rqst_htags, HTTP_HEADER_CONTENT_LENGTH)) { 1187 /* RFC7230 Hypertext Transfer Protocol (HTTP/1.1): Message Syntax and Routing 1188 * 3.3.3. Message Body Length 1189 * [...] 1190 * If a message is received with both a Transfer-Encoding and a 1191 * Content-Length header field, the Transfer-Encoding overrides the 1192 * Content-Length. Such a message might indicate an attempt to 1193 * perform request smuggling (Section 9.5) or response splitting 1194 * (Section 9.4) and ought to be handled as an error. A sender MUST 1195 * remove the received Content-Length field prior to forwarding such 1196 * a message downstream. 1197 */ 1198 const unsigned int http_header_strict = 1199 (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT); 1200 if (http_header_strict) { 1201 return http_request_header_line_invalid(r, 400, "invalid Transfer-Encoding + Content-Length -> 400"); 1202 } 1203 else { 1204 /* ignore Content-Length */ 1205 http_header_request_unset(r, HTTP_HEADER_CONTENT_LENGTH, CONST_STR_LEN("Content-Length")); 1206 } 1207 } 1208 if (http_method_get_or_head(r->http_method) 1209 && !(http_parseopts & HTTP_PARSEOPT_METHOD_GET_BODY)) { 1210 return http_request_header_line_invalid(r, 400, "GET/HEAD with content-length -> 400"); 1211 } 1212 } 1213 1214 return 0; 1215 } 1216 1217 1218 static int 1219 http_request_parse_hoff (request_st * const restrict r, char * const restrict hdrs, const unsigned short * const restrict hoff, const int scheme_port) 1220 { 1221 /* 1222 * Request: "^(GET|POST|HEAD|...) ([^ ]+(\\?[^ ]+|)) (HTTP/1\\.[01])$" 1223 * Header : "^([-a-zA-Z]+): (.+)$" 1224 * End : "^$" 1225 */ 1226 1227 int status; 1228 const unsigned int http_parseopts = r->conf.http_parseopts; 1229 1230 status = http_request_parse_reqline(r, hdrs, hoff, http_parseopts); 1231 if (0 != status) return status; 1232 1233 status = http_request_parse_headers(r, hdrs, hoff, http_parseopts); 1234 if (0 != status) return status; 1235 1236 return http_request_parse(r, scheme_port); 1237 } 1238 1239 1240 static void 1241 http_request_headers_fin (request_st * const restrict r) 1242 { 1243 if (0 == r->http_status) { 1244 #if 0 1245 r->conditional_is_valid = (1 << COMP_SERVER_SOCKET) 1246 | (1 << COMP_HTTP_SCHEME) 1247 | (1 << COMP_HTTP_HOST) 1248 | (1 << COMP_HTTP_REMOTE_IP) 1249 | (1 << COMP_HTTP_REQUEST_METHOD) 1250 | (1 << COMP_HTTP_URL) 1251 | (1 << COMP_HTTP_QUERY_STRING) 1252 | (1 << COMP_HTTP_REQUEST_HEADER); 1253 #else 1254 /* all config conditions are valid after parsing header 1255 * (set all bits; remove dependency on plugin_config.h) */ 1256 r->conditional_is_valid = ~0u; 1257 #endif 1258 } 1259 else { 1260 r->keep_alive = 0; 1261 r->reqbody_length = 0; 1262 } 1263 } 1264 1265 1266 void 1267 http_request_headers_process (request_st * const restrict r, char * const restrict hdrs, const unsigned short * const restrict hoff, const int scheme_port) 1268 { 1269 r->http_status = http_request_parse_hoff(r, hdrs, hoff, scheme_port); 1270 1271 http_request_headers_fin(r); 1272 1273 if (__builtin_expect( (0 != r->http_status), 0)) { 1274 if (r->conf.log_request_header_on_error) { 1275 /*(http_request_parse_headers() modifies hdrs only to 1276 * undo line-wrapping in-place using spaces)*/ 1277 log_error_multiline(r->conf.errh, __FILE__, __LINE__, 1278 hdrs, r->rqst_header_len, "rqst: "); 1279 } 1280 } 1281 } 1282 1283 1284 void 1285 http_request_headers_process_h2 (request_st * const restrict r, const int scheme_port) 1286 { 1287 if (0 == r->http_status) 1288 r->http_status = http_request_parse(r, scheme_port); 1289 1290 if (0 == r->http_status) { 1291 if (light_btst(r->rqst_htags, HTTP_HEADER_CONNECTION)) 1292 r->http_status = http_request_header_line_invalid(r, 400, 1293 "invalid Connection header with HTTP/2 -> 400"); 1294 } 1295 1296 http_request_headers_fin(r); 1297 1298 /* limited; headers not collected into a single buf for HTTP/2 */ 1299 if (__builtin_expect( (0 != r->http_status), 0)) { 1300 if (r->conf.log_request_header_on_error) { 1301 log_error(r->conf.errh, __FILE__, __LINE__, 1302 "request-header:\n:authority: %s\n:method: %s\n:path: %s", 1303 r->http_host ? r->http_host->ptr : "", 1304 http_method_buf(r->http_method)->ptr, 1305 !buffer_is_blank(&r->target) ? r->target.ptr : ""); 1306 } 1307 } 1308 1309 /* ignore Upgrade if using HTTP/2 */ 1310 if (light_btst(r->rqst_htags, HTTP_HEADER_UPGRADE)) 1311 http_header_request_unset(r, HTTP_HEADER_UPGRADE, 1312 CONST_STR_LEN("upgrade")); 1313 /* XXX: should filter out other hop-by-hop connection headers, too */ 1314 } 1315