1 /* 2 * request - HTTP request processing 3 * 4 * Fully-rewritten from original 5 * Copyright(c) 2018 Glenn Strauss gstrauss()gluelogic.com All rights reserved 6 * License: BSD 3-clause (same as lighttpd) 7 */ 8 #include "first.h" 9 10 #include "request.h" 11 #include "burl.h" 12 #include "http_header.h" 13 #include "http_kv.h" 14 #include "log.h" 15 #include "sock_addr.h" 16 17 #include <limits.h> 18 #include <stdint.h> 19 #include <stdlib.h> 20 #include <string.h> 21 22 __attribute_noinline__ 23 __attribute_nonnull__() 24 __attribute_pure__ 25 static const char * http_request_check_uri_strict (const uint8_t * const restrict s, const uint_fast32_t len) { 26 for (uint_fast32_t i = 0; i < len; ++i) { 27 if (__builtin_expect( (s[i] <= 32), 0)) return (const char *)s+i; 28 if (__builtin_expect( (s[i] == 127), 0)) return (const char *)s+i; 29 if (__builtin_expect( (s[i] == 255), 0)) return (const char *)s+i; 30 } 31 return NULL; 32 } 33 34 __attribute_nonnull__() 35 __attribute_pure__ 36 static const char * http_request_check_line_strict (const char * const restrict s, const uint_fast32_t len) { 37 for (uint_fast32_t i = 0; i < len; ++i) { 38 if (__builtin_expect( (((const uint8_t *)s)[i]<32), 0) && s[i] != '\t') 39 return s+i; 40 if (__builtin_expect( (s[i] == 127), 0)) 41 return s+i; 42 } 43 return NULL; 44 } 45 46 __attribute_nonnull__() 47 __attribute_pure__ 48 static const char * http_request_check_line_minimal (const char * const restrict s, const uint_fast32_t len) { 49 for (uint_fast32_t i = 0; i < len; ++i) { 50 if (__builtin_expect( (s[i] == '\0'), 0)) return s+i; 51 } 52 return NULL; 53 } 54 55 static int request_check_hostname(buffer * const host) { 56 /* 57 * hostport = host [ ":" port ] 58 * host = hostname | IPv4address | IPv6address 59 * hostname = *( domainlabel "." ) toplabel [ "." ] 60 * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum 61 * toplabel = alpha | alpha *( alphanum | "-" ) alphanum 62 * IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit 63 * IPv6address = "[" ... "]" 64 * port = *digit 65 */ 66 67 const char *h = host->ptr; 68 69 if (*h != '[') { 70 uint32_t len = buffer_clen(host); 71 const char * const colon = memchr(h, ':', len); 72 uint32_t hlen = colon ? (uint32_t)(colon - h) : len; 73 74 /* if hostname ends in ".", strip it */ 75 if (__builtin_expect( (0 == hlen), 0)) return -1; 76 if (__builtin_expect( (h[hlen-1] == '.'), 0)) { 77 /* shift port info one left */ 78 if (--hlen == 0) return -1; 79 --len; 80 if (NULL != colon) 81 memmove(host->ptr+hlen, colon, len - hlen); 82 buffer_truncate(host, len); 83 } 84 85 int label_len = 0; 86 int allnumeric = 1; 87 int numeric = 1; 88 int level = 0; 89 for (uint32_t i = 0; i < hlen; ++i) { 90 const int ch = h[i]; 91 ++label_len; 92 if (light_isdigit(ch)) 93 continue; 94 else if ((light_isalpha(ch) || (ch == '-' && i != 0))) 95 numeric = 0; 96 else if (ch == '.' && 1 != label_len && '-' != h[i+1]) { 97 allnumeric &= numeric; 98 numeric = 1; 99 label_len = 0; 100 ++level; 101 } 102 else 103 return -1; 104 } 105 /* (if last segment numeric, then IPv4 and must have 4 numeric parts) */ 106 if (0 == label_len || (numeric && (level != 3 || !allnumeric))) 107 return -1; 108 109 h += hlen; 110 } 111 else { /* IPv6 address */ 112 /* check the address inside [...]; note: not fully validating */ 113 /* (note: not allowing scoped literals, e.g. %eth0 suffix) */ 114 ++h; /* step past '[' */ 115 int cnt = 0; 116 while (light_isxdigit(*h) || *h == '.' || (*h == ':' && ++cnt < 8)) ++h; 117 /*(invalid char, too many ':', missing ']', or empty "[]")*/ 118 if (*h != ']' || h - host->ptr == 1) return -1; 119 ++h; /* step past ']' */ 120 } 121 122 /* check numerical port, if present */ 123 if (*h == ':') { 124 if (__builtin_expect( (h[1] == '\0'), 0)) /*(remove trailing colon)*/ 125 buffer_truncate(host, h - host->ptr); 126 do { ++h; } while (light_isdigit(*h)); 127 } 128 129 return (*h == '\0') ? 0 : -1; 130 } 131 132 int http_request_host_normalize(buffer * const b, const int scheme_port) { 133 /* 134 * check for and canonicalize numeric IP address and portnum (optional) 135 * (IP address may be followed by ":portnum" (optional)) 136 * - IPv6: "[...]" 137 * - IPv4: "x.x.x.x" 138 * - IPv4: 12345678 (32-bit decimal number) 139 * - IPv4: 012345678 (32-bit octal number) 140 * - IPv4: 0x12345678 (32-bit hex number) 141 * 142 * allow any chars (except ':' and '\0' and stray '[' or ']') 143 * (other code may check chars more strictly or more pedantically) 144 * ':' delimits (optional) port at end of string 145 * "[]" wraps IPv6 address literal 146 * '\0' should have been rejected earlier were it present 147 * 148 * any chars includes, but is not limited to: 149 * - allow '-' any where, even at beginning of word 150 * (security caution: might be confused for cmd flag if passed to shell) 151 * - allow all-digit TLDs 152 * (might be mistaken for IPv4 addr by inet_aton() 153 * unless non-digits appear in subdomain) 154 */ 155 156 /* Note: not using getaddrinfo() since it does not support "[]" around IPv6 157 * and is not as lenient as inet_aton() and inet_addr() for IPv4 strings. 158 * Not using inet_pton() (when available) on IPv4 for similar reasons. */ 159 160 const char * const p = b->ptr; 161 const size_t blen = buffer_clen(b); 162 long port = 0; 163 164 if (*p != '[') { 165 char * const colon = (char *)memchr(p, ':', blen); 166 if (colon) { 167 if (*p == ':') return -1; /*(empty host then port, or naked IPv6)*/ 168 if (colon[1] != '\0') { 169 char *e; 170 port = strtol(colon+1, &e, 0); /*(allow decimal, octal, hex)*/ 171 if (0 < port && port <= USHRT_MAX && *e == '\0') { 172 /* valid port */ 173 } else { 174 return -1; 175 } 176 } /*(else ignore stray colon at string end)*/ 177 buffer_truncate(b, (size_t)(colon - p)); /*(remove port str)*/ 178 } 179 180 if (light_isdigit(*p)) do { 181 /* (IPv4 address literal or domain starting w/ digit (e.g. 3com))*/ 182 /* (check one-element cache of normalized IPv4 address string) */ 183 static struct { char s[INET_ADDRSTRLEN]; size_t n; } laddr; 184 size_t n = colon ? (size_t)(colon - p) : blen; 185 sock_addr addr; 186 if (n == laddr.n && 0 == memcmp(p, laddr.s, n)) break; 187 if (1 == sock_addr_inet_pton(&addr, p, AF_INET, 0)) { 188 sock_addr_inet_ntop_copy_buffer(b, &addr); 189 n = buffer_clen(b); 190 if (n < sizeof(laddr.s)) memcpy(laddr.s, b->ptr, (laddr.n = n)); 191 } 192 } while (0); 193 } else do { /* IPv6 addr */ 194 #if defined(HAVE_IPV6) && defined(HAVE_INET_PTON) 195 196 /* (check one-element cache of normalized IPv4 address string) */ 197 static struct { char s[INET6_ADDRSTRLEN]; size_t n; } laddr; 198 sock_addr addr; 199 char *bracket = b->ptr+blen-1; 200 char *percent = strchr(b->ptr+1, '%'); 201 size_t len; 202 int rc; 203 char buf[INET6_ADDRSTRLEN+16]; /*(+16 for potential %interface name)*/ 204 if (blen <= 2) return -1; /*(invalid "[]")*/ 205 if (*bracket != ']') { 206 bracket = (char *)memchr(b->ptr+1, ']', blen-1); 207 if (NULL == bracket || bracket[1] != ':' || bracket - b->ptr == 1){ 208 return -1; 209 } 210 if (bracket[2] != '\0') { /*(ignore stray colon at string end)*/ 211 char *e; 212 port = strtol(bracket+2, &e, 0); /*(allow decimal, octal, hex)*/ 213 if (0 < port && port <= USHRT_MAX && *e == '\0') { 214 /* valid port */ 215 } else { 216 return -1; 217 } 218 } 219 } 220 221 len = (size_t)((percent ? percent : bracket) - (b->ptr+1)); 222 if (laddr.n == len && 0 == memcmp(laddr.s, b->ptr+1, len)) { 223 /* truncate after ']' and re-add normalized port, if needed */ 224 buffer_truncate(b, (size_t)(bracket - b->ptr + 1)); 225 break; 226 } 227 228 *bracket = '\0';/*(terminate IPv6 string)*/ 229 if (percent) *percent = '\0'; /*(remove %interface from address)*/ 230 rc = sock_addr_inet_pton(&addr, b->ptr+1, AF_INET6, 0); 231 if (percent) *percent = '%'; /*(restore %interface)*/ 232 *bracket = ']'; /*(restore bracket)*/ 233 if (1 != rc) return -1; 234 235 sock_addr_inet_ntop(&addr, buf, sizeof(buf)); 236 len = strlen(buf); 237 if (percent) { 238 if (percent > bracket) return -1; 239 if (len + (size_t)(bracket - percent) >= sizeof(buf)) return -1; 240 if (len < sizeof(laddr.s)) memcpy(laddr.s, buf, (laddr.n = len)); 241 memcpy(buf+len, percent, (size_t)(bracket - percent)); 242 len += (size_t)(bracket - percent); 243 } 244 buffer_truncate(b, 1); /* truncate after '[' */ 245 buffer_append_str2(b, buf, len, CONST_STR_LEN("]")); 246 247 #else 248 249 return -1; 250 251 #endif 252 } while (0); 253 254 if (0 != port && port != scheme_port) { 255 buffer_append_string_len(b, CONST_STR_LEN(":")); 256 buffer_append_int(b, (int)port); 257 } 258 259 return 0; 260 } 261 262 int http_request_host_policy (buffer * const b, const unsigned int http_parseopts, const int scheme_port) { 263 /* caller should lowercase, as is done in http_request_header_set_Host(), 264 * for consistency in case the value is used prior to calling policy func */ 265 /*buffer_to_lower(b);*/ 266 return (((http_parseopts & HTTP_PARSEOPT_HOST_STRICT) 267 ? 0 != request_check_hostname(b) 268 : NULL != http_request_check_line_minimal(BUF_PTR_LEN(b))) 269 || ((http_parseopts & HTTP_PARSEOPT_HOST_NORMALIZE) 270 && 0 != http_request_host_normalize(b, scheme_port))); 271 } 272 273 __attribute_cold__ 274 __attribute_noinline__ 275 static int http_request_header_line_invalid(request_st * const restrict r, const int status, const char * const restrict msg) { 276 if (r->conf.log_request_header_on_error) { 277 if (msg) log_error(r->conf.errh, __FILE__, __LINE__, "%s", msg); 278 } 279 return status; 280 } 281 282 __attribute_cold__ 283 __attribute_noinline__ 284 static int http_request_header_char_invalid(request_st * const restrict r, const char ch, const char * const restrict msg) { 285 if (r->conf.log_request_header_on_error) { 286 if ((unsigned char)ch > 32 && ch != 127) { 287 log_error(r->conf.errh, __FILE__, __LINE__, "%s ('%c')", msg, ch); 288 } 289 else { 290 log_error(r->conf.errh, __FILE__, __LINE__, "%s (0x%x)", msg, ch); 291 } 292 } 293 return 400; 294 } 295 296 297 __attribute_noinline__ 298 static void http_request_header_set_Host(request_st * const restrict r, const char * const h, size_t hlen) 299 { 300 r->http_host = http_header_request_set_ptr(r, HTTP_HEADER_HOST, 301 CONST_STR_LEN("Host")); 302 buffer_copy_string_len_lc(r->http_host, h, hlen); 303 } 304 305 306 int64_t 307 li_restricted_strtoint64 (const char *v, const uint32_t vlen, const char ** const err) 308 { 309 /* base 10 strtoll() parsing exactly vlen chars and requiring digits 0-9 */ 310 /* rejects negative numbers and considers values > INT64_MAX an error */ 311 /* note: errno is not set; detect error if *err != v+vlen upon return */ 312 /*(caller must check 0 == vlen if that is to be an error for caller)*/ 313 int64_t rv = 0; 314 uint32_t i; 315 for (i = 0; i < vlen; ++i) { 316 const uint8_t c = ((uint8_t *)v)[i] - '0'; /*(unsigned; underflow ok)*/ 317 if (c > 9) break; 318 if (rv > INT64_MAX/10) break; 319 rv *= 10; 320 if (rv > INT64_MAX - c) break; 321 rv += c; 322 } 323 *err = v+i; 324 return rv; 325 } 326 327 328 __attribute_cold__ 329 static int http_request_parse_duplicate(request_st * const restrict r, const enum http_header_e id, const char * const restrict k, const size_t klen, const char * const restrict v, const size_t vlen) { 330 /* Proxies sometimes send dup headers 331 * if they are the same we ignore the second 332 * if not, we raise an error */ 333 const buffer * const vb = http_header_request_get(r, id, k, klen); 334 if (vb && buffer_eq_icase_slen(vb, v, vlen)) 335 return 0; /* ignore header; matches existing header */ 336 337 const char *errmsg; 338 switch (id) { 339 case HTTP_HEADER_HOST: 340 errmsg = "duplicate Host header -> 400"; 341 break; 342 case HTTP_HEADER_CONTENT_TYPE: 343 errmsg = "duplicate Content-Type header -> 400"; 344 break; 345 case HTTP_HEADER_IF_MODIFIED_SINCE: 346 errmsg = "duplicate If-Modified-Since header -> 400"; 347 break; 348 default: 349 errmsg = "duplicate header -> 400"; 350 break; 351 } 352 return http_request_header_line_invalid(r, 400, errmsg); 353 } 354 355 356 /* add header to list of headers 357 * certain headers are also parsed 358 * might drop a header if deemed unnecessary/broken 359 * 360 * returns 0 on success, HTTP status on error 361 */ 362 static int http_request_parse_single_header(request_st * const restrict r, const enum http_header_e id, const char * const restrict k, const size_t klen, const char * const restrict v, const size_t vlen) { 363 /* 364 * Note: k might not be '\0'-terminated 365 * Note: v is not '\0'-terminated 366 * With lighttpd HTTP/1.1 parser, v ends with whitespace 367 * (one of '\r' '\n' ' ' '\t') 368 * With lighttpd HTTP/2 parser, v should not be accessed beyond vlen 369 * (care must be taken to avoid libc funcs which expect z-strings) 370 */ 371 /*assert(vlen);*//*(caller must not call this func with 0 klen or 0 vlen)*/ 372 373 switch (id) { 374 /*case HTTP_HEADER_OTHER:*/ 375 default: 376 break; 377 case HTTP_HEADER_HOST: 378 if (!light_btst(r->rqst_htags, HTTP_HEADER_HOST)) { 379 if (vlen >= 1024) { /*(expecting < 256)*/ 380 return http_request_header_line_invalid(r, 400, "uri-authority too long -> 400"); 381 } 382 /*(http_request_header_append() plus sets r->http_host)*/ 383 http_request_header_set_Host(r, v, vlen); 384 return 0; 385 } 386 else if (NULL != r->http_host 387 && __builtin_expect( buffer_eq_slen(r->http_host,v,vlen), 1)) { 388 /* ignore all Host: headers if match authority in request line */ 389 /* (expect Host to match case in :authority of HTTP/2 request) */ 390 return 0; /* ignore header */ 391 } 392 else { 393 return http_request_parse_duplicate(r, id, k, klen, v, vlen); 394 } 395 break; 396 case HTTP_HEADER_CONNECTION: 397 /* "Connection: close" is common case if header is present */ 398 if ((vlen == 5 && buffer_eq_icase_ssn(v, CONST_STR_LEN("close"))) 399 || http_header_str_contains_token(v,vlen,CONST_STR_LEN("close"))) { 400 r->keep_alive = 0; 401 break; 402 } 403 if (http_header_str_contains_token(v,vlen,CONST_STR_LEN("keep-alive"))){ 404 r->keep_alive = 1; 405 break; 406 } 407 break; 408 case HTTP_HEADER_CONTENT_TYPE: 409 if (light_btst(r->rqst_htags, HTTP_HEADER_CONTENT_TYPE)) { 410 return http_request_parse_duplicate(r, id, k, klen, v, vlen); 411 } 412 break; 413 case HTTP_HEADER_IF_NONE_MATCH: 414 /* if dup, only the first one will survive */ 415 if (light_btst(r->rqst_htags, HTTP_HEADER_IF_NONE_MATCH)) { 416 return 0; /* ignore header */ 417 } 418 break; 419 case HTTP_HEADER_CONTENT_LENGTH: 420 if (!light_btst(r->rqst_htags, HTTP_HEADER_CONTENT_LENGTH)) { 421 /*(trailing whitespace was removed from vlen)*/ 422 /*(not using strtoll() since v might not be z-string)*/ 423 const char *err; 424 off_t clen = (off_t)li_restricted_strtoint64(v, vlen, &err); 425 if (err == v+vlen) { 426 /* (set only if not set to -1 by Transfer-Encoding: chunked) */ 427 if (0 == r->reqbody_length) r->reqbody_length = clen; 428 } 429 else { 430 return http_request_header_line_invalid(r, 400, "invalid Content-Length header -> 400"); 431 } 432 } 433 else { 434 return http_request_header_line_invalid(r, 400, "duplicate Content-Length header -> 400"); 435 } 436 break; 437 case HTTP_HEADER_HTTP2_SETTINGS: 438 if (light_btst(r->rqst_htags, HTTP_HEADER_HTTP2_SETTINGS)) { 439 return http_request_header_line_invalid(r, 400, "duplicate HTTP2-Settings header -> 400"); 440 } 441 break; 442 case HTTP_HEADER_IF_MODIFIED_SINCE: 443 if (light_btst(r->rqst_htags, HTTP_HEADER_IF_MODIFIED_SINCE)) { 444 return http_request_parse_duplicate(r, id, k, klen, v, vlen); 445 } 446 break; 447 case HTTP_HEADER_TRANSFER_ENCODING: 448 if (HTTP_VERSION_1_1 != r->http_version) { 449 return http_request_header_line_invalid(r, 400, 450 HTTP_VERSION_1_0 == r->http_version 451 ? "HTTP/1.0 with Transfer-Encoding (bad HTTP/1.0 proxy?) -> 400" 452 : "HTTP/2 with Transfer-Encoding is invalid -> 400"); 453 } 454 455 if (!buffer_eq_icase_ss(v, vlen, CONST_STR_LEN("chunked"))) { 456 /* Transfer-Encoding might contain additional encodings, 457 * which are not currently supported by lighttpd */ 458 return http_request_header_line_invalid(r, 501, NULL); /* Not Implemented */ 459 } 460 r->reqbody_length = -1; 461 462 /* Transfer-Encoding is a hop-by-hop header, 463 * which must not be blindly forwarded to backends */ 464 return 0; /* skip header */ 465 } 466 467 http_header_request_append(r, id, k, klen, v, vlen); 468 return 0; 469 } 470 471 __attribute_cold__ 472 static int http_request_parse_proto_loose(request_st * const restrict r, const char * const restrict ptr, const size_t len, const unsigned int http_parseopts) { 473 const char * proto = memchr(ptr, ' ', len); 474 if (NULL == proto) 475 return http_request_header_line_invalid(r, 400, "incomplete request line -> 400"); 476 proto = memchr(proto+1, ' ', len - (proto+1 - ptr)); 477 if (NULL == proto) 478 return http_request_header_line_invalid(r, 400, "incomplete request line -> 400"); 479 ++proto; 480 481 if (proto[0]=='H' && proto[1]=='T' && proto[2]=='T' && proto[3]=='P' && proto[4] == '/') { 482 if (proto[5] == '1' && proto[6] == '.' && (proto[7] == '1' || proto[7] == '0')) { 483 /* length already checked before calling this routine */ 484 /* (len != (size_t)(proto - ptr + 8)) */ 485 if (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT) /*(http_header_strict)*/ 486 return http_request_header_line_invalid(r, 400, "incomplete request line -> 400"); 487 r->http_version = (proto[7] == '1') ? HTTP_VERSION_1_1 : HTTP_VERSION_1_0; 488 } 489 else 490 return http_request_header_line_invalid(r, 505, "unknown HTTP version -> 505"); 491 } 492 else 493 return http_request_header_line_invalid(r, 400, "unknown protocol -> 400"); 494 495 /* keep-alive default: HTTP/1.1 -> true; HTTP/1.0 -> false */ 496 r->keep_alive = (HTTP_VERSION_1_0 != r->http_version); 497 498 return 0; 499 } 500 501 __attribute_cold__ 502 static const char * http_request_parse_reqline_uri(request_st * const restrict r, const char * const restrict uri, const size_t len, const unsigned int http_parseopts) { 503 const char *nuri; 504 if ((len > 7 && buffer_eq_icase_ssn(uri, "http://", 7) 505 && NULL != (nuri = memchr(uri + 7, '/', len-7))) 506 || 507 (len > 8 && buffer_eq_icase_ssn(uri, "https://", 8) 508 && NULL != (nuri = memchr(uri + 8, '/', len-8)))) { 509 const char * const host = uri + (uri[4] == ':' ? 7 : 8); 510 const size_t hostlen = nuri - host; 511 if (0 == hostlen || hostlen >= 1024) { /*(expecting < 256)*/ 512 http_request_header_line_invalid(r, 400, "uri-authority empty or too long -> 400"); 513 return NULL; 514 } 515 /* Insert as "Host" header */ 516 http_request_header_set_Host(r, host, hostlen); 517 return nuri; 518 } else if (!(http_parseopts & HTTP_PARSEOPT_HEADER_STRICT) /*(!http_header_strict)*/ 519 || (HTTP_METHOD_CONNECT == r->http_method && (uri[0] == ':' || light_isdigit(uri[0]))) 520 || (HTTP_METHOD_OPTIONS == r->http_method && uri[0] == '*' && 1 == len)) { 521 /* (permitted) */ 522 return uri; 523 } else { 524 http_request_header_line_invalid(r, 400, "request-URI parse error -> 400"); 525 return NULL; 526 } 527 } 528 529 530 __attribute_cold__ 531 __attribute_noinline__ 532 static int http_request_parse_header_other(request_st * const restrict r, const char * const restrict k, const int klen, const unsigned int http_header_strict); 533 534 535 int 536 http_request_validate_pseudohdrs (request_st * const restrict r, const int scheme, const unsigned int http_parseopts) 537 { 538 /* :method is required to indicate method 539 * CONNECT method must have :method and :authority 540 * All other methods must have at least :method :scheme :path */ 541 542 if (HTTP_METHOD_UNSET == r->http_method) 543 return http_request_header_line_invalid(r, 400, 544 "missing pseudo-header method -> 400"); 545 546 if (__builtin_expect( (HTTP_METHOD_CONNECT != r->http_method), 1)) { 547 if (!scheme) 548 return http_request_header_line_invalid(r, 400, 549 "missing pseudo-header scheme -> 400"); 550 551 if (buffer_is_blank(&r->target)) 552 return http_request_header_line_invalid(r, 400, 553 "missing pseudo-header path -> 400"); 554 555 const char * const uri = r->target.ptr; 556 if (*uri != '/') { /* (common case: (*uri == '/')) */ 557 if (uri[0] != '*' || uri[1] != '\0' 558 || HTTP_METHOD_OPTIONS != r->http_method) 559 return http_request_header_line_invalid(r, 400, 560 "invalid pseudo-header path -> 400"); 561 } 562 } 563 else { /* HTTP_METHOD_CONNECT */ 564 if (NULL == r->http_host) 565 return http_request_header_line_invalid(r, 400, 566 "missing pseudo-header authority -> 400"); 567 if (!buffer_is_blank(&r->target) || scheme) 568 return http_request_header_line_invalid(r, 400, 569 "invalid pseudo-header with CONNECT -> 400"); 570 /* note: this copy occurs prior to http_request_host_policy() 571 * so any consumer handling CONNECT should normalize r->target 572 * as appropriate */ 573 buffer_copy_buffer(&r->target, r->http_host); 574 } 575 buffer_copy_buffer(&r->target_orig, &r->target); 576 577 /* r->http_host, if set, is checked with http_request_host_policy() 578 * in http_request_parse() */ 579 580 /* copied and modified from end of http_request_parse_reqline() */ 581 582 /* check uri for invalid characters */ 583 const uint32_t len = buffer_clen(&r->target);/*(http_header_strict)*/ 584 const char * const x = (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT) 585 ? (http_parseopts & HTTP_PARSEOPT_URL_NORMALIZE_CTRLS_REJECT) 586 ? NULL /* URI will be checked in http_request_parse_target() */ 587 : http_request_check_uri_strict((const uint8_t *)r->target.ptr, len) 588 : http_request_check_line_minimal(r->target.ptr, len); 589 return (NULL == x) 590 ? 0 591 : http_request_header_char_invalid(r, *x, 592 "invalid character in URI -> 400"); 593 } 594 595 596 int 597 http_request_parse_header (request_st * const restrict r, http_header_parse_ctx * const restrict hpctx) 598 { 599 /* Note: k and v might not be '\0' terminated strings; 600 * care must be taken to avoid libc funcs which expect z-strings */ 601 const char * const restrict k = hpctx->k; 602 const char * const restrict v = hpctx->v; 603 const uint32_t klen = hpctx->klen; 604 const uint32_t vlen = hpctx->vlen; 605 606 if (0 == klen) 607 return http_request_header_line_invalid(r, 400, 608 "invalid header key -> 400"); 609 610 if ((hpctx->hlen += klen + vlen + 4) > hpctx->max_request_field_size) { 611 /*(configurable with server.max-request-field-size; default 8k)*/ 612 #if 1 /* emit to error log for people sending large headers */ 613 log_error(r->conf.errh, __FILE__, __LINE__, 614 "oversized request header -> 431"); 615 return 431; /* Request Header Fields Too Large */ 616 #else 617 /* 431 Request Header Fields Too Large */ 618 return http_request_header_line_invalid(r, 431, 619 "oversized request header -> 431"); 620 #endif 621 } 622 623 if (!hpctx->trailers) { 624 if (*k == ':') { 625 /* HTTP/2 request pseudo-header fields */ 626 if (!hpctx->pseudo) /*(pseudo header after non-pseudo header)*/ 627 return http_request_header_line_invalid(r, 400, 628 "invalid pseudo-header -> 400"); 629 if (0 == vlen) 630 return http_request_header_line_invalid(r, 400, 631 "invalid header value -> 400"); 632 633 /* (note: relies on implementation details using ls-hpack in h2.c) 634 * (hpctx->id mapped from lsxpack_header_t hpack_index, which only 635 * matches key, not also value, if lsxpack_header_t flags does not 636 * have LSXPACK_HPACK_VAL_MATCHED set, so HTTP_HEADER_H2_METHOD_GET 637 * below indicates any method, not only "GET") */ 638 if (__builtin_expect( (hpctx->id == HTTP_HEADER_H2_UNKNOWN), 0)) { 639 switch (klen-1) { 640 case 4: 641 if (0 == memcmp(k+1, "path", 4)) 642 hpctx->id = HTTP_HEADER_H2_PATH; 643 break; 644 case 6: 645 if (0 == memcmp(k+1, "method", 6)) 646 hpctx->id = HTTP_HEADER_H2_METHOD_GET; 647 else if (0 == memcmp(k+1, "scheme", 6)) 648 hpctx->id = HTTP_HEADER_H2_SCHEME_HTTP; 649 break; 650 case 9: 651 if (0 == memcmp(k+1, "authority", 9)) 652 hpctx->id = HTTP_HEADER_H2_AUTHORITY; 653 break; 654 default: 655 break; 656 } 657 if (hpctx->id >= HTTP_HEADER_H2_UNKNOWN) 658 return http_request_header_line_invalid(r, 400, 659 "invalid pseudo-header -> 400"); 660 } 661 662 switch (hpctx->id) { 663 case HTTP_HEADER_H2_AUTHORITY: 664 if (__builtin_expect( (r->http_host != NULL), 0)) 665 break; 666 if (vlen >= 1024) /*(expecting < 256)*/ 667 return http_request_header_line_invalid(r, 400, 668 "invalid pseudo-header authority too long -> 400"); 669 /* insert as "Host" header */ 670 http_request_header_set_Host(r, v, vlen); 671 return 0; 672 case HTTP_HEADER_H2_METHOD_GET: /*(any method, not only "GET")*/ 673 case HTTP_HEADER_H2_METHOD_POST: 674 if (__builtin_expect( (HTTP_METHOD_UNSET != r->http_method), 0)) 675 break; 676 r->http_method = get_http_method_key(v, vlen); 677 if (HTTP_METHOD_UNSET >= r->http_method) 678 return http_request_header_line_invalid(r, 501, 679 "unknown http-method -> 501"); 680 return 0; 681 case HTTP_HEADER_H2_PATH: /*(any path, not only "/")*/ 682 case HTTP_HEADER_H2_PATH_INDEX_HTML: 683 if (__builtin_expect( (!buffer_is_blank(&r->target)), 0)) 684 break; 685 buffer_copy_string_len(&r->target, v, vlen); 686 return 0; 687 case HTTP_HEADER_H2_SCHEME_HTTP: /*(any scheme, not only "http")*/ 688 case HTTP_HEADER_H2_SCHEME_HTTPS: 689 if (__builtin_expect( (hpctx->scheme), 0)) 690 break; 691 hpctx->scheme = 1; /*(marked present, but otherwise ignored)*/ 692 return 0; 693 #if 0 694 switch (vlen) {/*(validated, but then ignored)*/ 695 case 5: /* "https" */ 696 if (v[4]!='s') break; 697 __attribute_fallthrough__ 698 case 4: /* "http" */ 699 if (v[0]=='h' && v[1]=='t' && v[2]=='t' && v[3]=='p') { 700 hpctx->scheme = 1; 701 return 0; 702 } 703 break; 704 default: 705 break; 706 } 707 return http_request_header_line_invalid(r, 400, 708 "unknown pseudo-header scheme -> 400"); 709 #endif 710 default: 711 return http_request_header_line_invalid(r, 400, 712 "invalid pseudo-header -> 400"); 713 } 714 return http_request_header_line_invalid(r, 400, 715 "repeated pseudo-header -> 400"); 716 } 717 else { /*(non-pseudo headers)*/ 718 if (hpctx->pseudo) { /*(transition to non-pseudo headers)*/ 719 hpctx->pseudo = 0; 720 int status = 721 http_request_validate_pseudohdrs(r, hpctx->scheme, 722 hpctx->http_parseopts); 723 if (0 != status) return status; 724 } 725 if (0 == vlen) 726 return 0; 727 728 const unsigned int http_header_strict = 729 (hpctx->http_parseopts & HTTP_PARSEOPT_HEADER_STRICT); 730 731 const char * const x = (http_header_strict) 732 ? http_request_check_line_strict(v, vlen) 733 : http_request_check_line_minimal(v, vlen); 734 if (x) 735 return http_request_header_char_invalid(r, *x, 736 "invalid character in header -> 400"); 737 738 if (__builtin_expect( (hpctx->id == HTTP_HEADER_H2_UNKNOWN), 0)) { 739 uint32_t j = 0; 740 while (j < klen && (light_islower(k[j]) || k[j] == '-')) 741 ++j; 742 743 if (__builtin_expect( (j != klen), 0)) { 744 if (light_isupper(k[j])) 745 return 400; 746 if (0 != http_request_parse_header_other(r, k+j, klen-j, 747 http_header_strict)) 748 return 400; 749 } 750 751 hpctx->id = http_header_hkey_get_lc(k, klen); 752 } 753 754 const enum http_header_e id = (enum http_header_e)hpctx->id; 755 756 if (__builtin_expect( (id == HTTP_HEADER_TE), 0) 757 && !buffer_eq_icase_ss(v, vlen, CONST_STR_LEN("trailers"))) 758 return http_request_header_line_invalid(r, 400, 759 "invalid TE header value with HTTP/2 -> 400"); 760 761 return http_request_parse_single_header(r, id, k, klen, v, vlen); 762 } 763 } 764 else { /*(trailers)*/ 765 if (*k == ':') 766 return http_request_header_line_invalid(r, 400, 767 "invalid pseudo-header in trailers -> 400"); 768 /* ignore trailers (after required HPACK decoding) if streaming 769 * request body to backend since headers have already been sent 770 * to backend via Common Gateway Interface (CGI) (CGI, FastCGI, 771 * SCGI, etc) or HTTP/1.1 (proxy) (mod_proxy does not currently 772 * support using HTTP/2 to connect to backends) */ 773 #if 0 /* (if needed, save flag in hpctx instead of fdevent.h dependency)*/ 774 if (r->conf.stream_request_body & FDEVENT_STREAM_REQUEST) 775 return 0; 776 #endif 777 /* Note: do not unconditionally merge into headers since if 778 * headers had already been sent to backend, then mod_accesslog 779 * logging of request headers might be inaccurate. 780 * Many simple backends do not support HTTP/1.1 requests sending 781 * Transfer-Encoding: chunked, and even those that do might not 782 * handle trailers. Some backends do not even support HTTP/1.1. 783 * For all these reasons, ignore trailers if streaming request 784 * body to backend. Revisit in future if adding support for 785 * connecting to backends using HTTP/2 (with explicit config 786 * option to force connecting to backends using HTTP/2) */ 787 788 /* XXX: TODO: request trailers not handled if streaming reqbody 789 * XXX: must ensure that trailers are not disallowed field-names 790 */ 791 792 #if 0 793 if (0 == vlen) 794 return 0; 795 #endif 796 797 return 0; 798 } 799 } 800 801 802 static int http_request_parse_reqline(request_st * const restrict r, const char * const restrict ptr, const unsigned short * const restrict hoff, const unsigned int http_parseopts) { 803 size_t len = hoff[2]; 804 805 /* parse the first line of the request 806 * <method> <uri> <protocol>\r\n 807 * */ 808 if (len < 13) /* minimum len with (!http_header_strict): "x x HTTP/1.0\n" */ 809 return http_request_header_line_invalid(r, 400, "invalid request line (too short) -> 400"); 810 if (ptr[len-2] == '\r') 811 len-=2; 812 else if (!(http_parseopts & HTTP_PARSEOPT_HEADER_STRICT)) /*(!http_header_strict)*/ 813 len-=1; 814 else 815 return http_request_header_line_invalid(r, 400, "missing CR before LF in header -> 400"); 816 817 /* 818 * RFC7230: 819 * HTTP-version = HTTP-name "/" DIGIT "." DIGIT 820 * HTTP-name = %x48.54.54.50 ; "HTTP", case-sensitive 821 */ 822 823 /* protocol is expected to be " HTTP/1.1" or " HTTP/1.0" at end of line */ 824 union proto_un { 825 char c[8]; 826 uint64_t u; 827 }; 828 static const union proto_un http_1_1 = {{'H','T','T','P','/','1','.','1'}}; 829 static const union proto_un http_1_0 = {{'H','T','T','P','/','1','.','0'}}; 830 const char *p = ptr + len - 8; 831 union proto_un proto8; 832 proto8.c[0]=p[0]; proto8.c[1]=p[1]; proto8.c[2]=p[2]; proto8.c[3]=p[3]; 833 proto8.c[4]=p[4]; proto8.c[5]=p[5]; proto8.c[6]=p[6]; proto8.c[7]=p[7]; 834 if (p[-1] == ' ' && http_1_1.u == proto8.u) { 835 r->http_version = HTTP_VERSION_1_1; 836 r->keep_alive = 1; /* keep-alive default: HTTP/1.1 -> true */ 837 } 838 else if (p[-1] == ' ' && http_1_0.u == proto8.u) { 839 r->http_version = HTTP_VERSION_1_0; 840 r->keep_alive = 0; /* keep-alive default: HTTP/1.0 -> false */ 841 } 842 else { 843 int status = http_request_parse_proto_loose(r,ptr,len,http_parseopts); 844 if (0 != status) return status; 845 /*(space char must exist if http_request_parse_proto_loose() succeeds)*/ 846 for (p = ptr + len - 9; p[-1] != ' '; --p) ; 847 } 848 849 /* method is expected to be a short string in the general case */ 850 size_t i = 0; 851 while (ptr[i] != ' ') ++i; 852 #if 0 /*(space must exist if protocol was parsed successfully)*/ 853 while (i < len && ptr[i] != ' ') ++i; 854 if (ptr[i] != ' ') 855 return http_request_header_line_invalid(r, 400, "incomplete request line -> 400"); 856 #endif 857 858 r->http_method = get_http_method_key(ptr, i); 859 if (HTTP_METHOD_UNSET >= r->http_method) 860 return http_request_header_line_invalid(r, 501, "unknown http-method -> 501"); 861 862 const char *uri = ptr + i + 1; 863 864 if (uri == p) 865 return http_request_header_line_invalid(r, 400, "no uri specified -> 400"); 866 len = (size_t)(p - uri - 1); 867 868 if (*uri != '/') { /* (common case: (*uri == '/')) */ 869 uri = http_request_parse_reqline_uri(r, uri, len, http_parseopts); 870 if (NULL == uri) return 400; 871 len = (size_t)(p - uri - 1); 872 } 873 874 if (0 == len) 875 return http_request_header_line_invalid(r, 400, "no uri specified -> 400"); 876 877 /* check uri for invalid characters */ /* http_header_strict */ 878 const char * const x = (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT) 879 ? (http_parseopts & HTTP_PARSEOPT_URL_NORMALIZE_CTRLS_REJECT) 880 ? NULL /* URI will be checked in http_request_parse_target() */ 881 : http_request_check_uri_strict((const uint8_t *)uri, len) 882 : memchr(ptr, '\0', hoff[hoff[0]]);/* check entire headers set for '\0' */ 883 if (x) 884 http_request_header_char_invalid(r, *x, "invalid character in URI -> 400"); 885 886 buffer_copy_string_len(&r->target, uri, len); 887 buffer_copy_string_len(&r->target_orig, uri, len); 888 return 0; 889 } 890 891 int http_request_parse_target(request_st * const r, int scheme_port) { 892 /* URI is parsed into components at start of request and may 893 * also be re-parsed upon HANDLER_COMEBACK during the request 894 * r->target is expected to be a "/url-part?query-part" 895 * (and *not* a fully-qualified URI starting https://...) 896 * r->uri.authority is expected to be parsed elsewhere into r->http_host 897 */ 898 899 /** 900 * prepare strings 901 * 902 * - uri.path 903 * - uri.query 904 * 905 */ 906 907 /** 908 * Name according to RFC 2396 909 * 910 * - scheme 911 * - authority 912 * - path 913 * - query 914 * 915 * (scheme)://(authority)(path)?(query)#fragment 916 * 917 */ 918 919 /* take initial scheme value from connection-level state 920 * (request r->uri.scheme can be overwritten for later, 921 * for example by mod_extforward or mod_magnet) */ 922 buffer_copy_string_len(&r->uri.scheme, "https", scheme_port == 443 ? 5 : 4); 923 924 buffer * const target = &r->target; 925 if (r->http_method == HTTP_METHOD_CONNECT 926 || (r->http_method == HTTP_METHOD_OPTIONS 927 && target->ptr[0] == '*' 928 && target->ptr[1] == '\0')) { 929 /* CONNECT ... (or) OPTIONS * ... */ 930 buffer_copy_buffer(&r->uri.path, target); 931 buffer_clear(&r->uri.query); 932 return 0; 933 } 934 935 char *qstr; 936 if (r->conf.http_parseopts & HTTP_PARSEOPT_URL_NORMALIZE) { 937 /*uint32_t len = buffer_clen(target);*/ 938 int qs = burl_normalize(target, r->tmp_buf, r->conf.http_parseopts); 939 if (-2 == qs) 940 return http_request_header_line_invalid(r, 400, 941 "invalid character in URI -> 400"); /* Bad Request */ 942 qstr = (-1 == qs) ? NULL : target->ptr+qs; 943 #if 0 /* future: might enable here, or below for all requests */ 944 /* (Note: total header size not recalculated on HANDLER_COMEBACK 945 * even if other request headers changed during processing) 946 * (If (0 != r->loops_per_request), then the generated 947 * request is too large. Should a different error be returned?) */ 948 r->rqst_header_len -= len; 949 len = buffer_clen(target); 950 r->rqst_header_len += len; 951 if (len > MAX_HTTP_REQUEST_URI) { 952 return 414; /* 414 URI Too Long */ 953 } 954 if (r->rqst_header_len > MAX_HTTP_REQUEST_HEADER) { 955 log_error(r->conf.errh, __FILE__, __LINE__, 956 "request header fields too large: %u -> 431", 957 r->rqst_header_len); 958 return 431; /* Request Header Fields Too Large */ 959 } 960 #endif 961 } 962 else { 963 size_t rlen = buffer_clen(target); 964 qstr = memchr(target->ptr, '#', rlen);/* discard fragment */ 965 if (qstr) { 966 rlen = (size_t)(qstr - target->ptr); 967 buffer_truncate(target, rlen); 968 } 969 qstr = memchr(target->ptr, '?', rlen); 970 } 971 972 /** extract query string from target */ 973 const char * const pstr = target->ptr; 974 const uint32_t rlen = buffer_clen(target); 975 uint32_t plen; 976 if (NULL != qstr) { 977 plen = (uint32_t)(qstr - pstr); 978 buffer_copy_string_len(&r->uri.query, qstr + 1, rlen - plen - 1); 979 } 980 else { 981 plen = rlen; 982 buffer_clear(&r->uri.query); 983 } 984 buffer_copy_string_len(&r->uri.path, pstr, plen); 985 986 /* decode url to path 987 * 988 * - decode url-encodings (e.g. %20 -> ' ') 989 * - remove path-modifiers (e.g. /../) 990 */ 991 992 buffer_urldecode_path(&r->uri.path); 993 buffer_path_simplify(&r->uri.path); 994 if (r->uri.path.ptr[0] != '/') 995 return http_request_header_line_invalid(r, 400, 996 "uri-path does not begin with '/' -> 400"); /* Bad Request */ 997 998 return 0; 999 } 1000 1001 __attribute_cold__ 1002 __attribute_noinline__ 1003 static int http_request_parse_header_other(request_st * const restrict r, const char * const restrict k, const int klen, const unsigned int http_header_strict) { 1004 for (int i = 0; i < klen; ++i) { 1005 if (light_isalpha(k[i]) || k[i] == '-') continue; /*(common cases)*/ 1006 /** 1007 * 1*<any CHAR except CTLs or separators> 1008 * CTLs == 0-31 + 127, CHAR = 7-bit ascii (0..127) 1009 * 1010 */ 1011 switch(k[i]) { 1012 case ' ': 1013 case '\t': 1014 return http_request_header_line_invalid(r, 400, "WS character in key -> 400"); 1015 case '(': 1016 case ')': 1017 case '<': 1018 case '>': 1019 case '@': 1020 case ',': 1021 case ';': 1022 case '\\': 1023 case '\"': 1024 case '/': 1025 case '[': 1026 case ']': 1027 case '?': 1028 case '=': 1029 case '{': 1030 case '}': 1031 return http_request_header_char_invalid(r, k[i], "invalid character in header key -> 400"); 1032 default: 1033 if (http_header_strict ? (k[i] < 32 || ((unsigned char *)k)[i] >= 127) : k[i] == '\0') 1034 return http_request_header_char_invalid(r, k[i], "invalid character in header key -> 400"); 1035 break; /* ok */ 1036 } 1037 } 1038 return 0; 1039 } 1040 1041 static int http_request_parse_headers(request_st * const restrict r, char * const restrict ptr, const unsigned short * const restrict hoff, const unsigned int http_parseopts) { 1042 const unsigned int http_header_strict = (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT); 1043 1044 #if 0 /*(not checked here; will later result in invalid label for HTTP header)*/ 1045 int i = hoff[2]; 1046 1047 if (ptr[i] == ' ' || ptr[i] == '\t') { 1048 return http_request_header_line_invalid(r, 400, "WS at the start of first line -> 400"); 1049 } 1050 #endif 1051 1052 for (int i = 2; i < hoff[0]; ++i) { 1053 const char *k = ptr + hoff[i]; 1054 /* one past last line hoff[hoff[0]] is to final "\r\n" */ 1055 char *end = ptr + hoff[i+1]; 1056 1057 const char *colon = memchr(k, ':', end - k); 1058 if (NULL == colon) 1059 return http_request_header_line_invalid(r, 400, "invalid header missing ':' -> 400"); 1060 1061 const char *v = colon + 1; 1062 1063 /* RFC7230 Hypertext Transfer Protocol (HTTP/1.1): Message Syntax and Routing 1064 * 3.2.4. Field Parsing 1065 * [...] 1066 * No whitespace is allowed between the header field-name and colon. In 1067 * the past, differences in the handling of such whitespace have led to 1068 * security vulnerabilities in request routing and response handling. A 1069 * server MUST reject any received request message that contains 1070 * whitespace between a header field-name and colon with a response code 1071 * of 400 (Bad Request). A proxy MUST remove any such whitespace from a 1072 * response message before forwarding the message downstream. 1073 */ 1074 /* (line k[-1] is always preceded by a '\n', 1075 * including first header after request-line, 1076 * so no need to check colon != k) */ 1077 if (colon[-1] == ' ' || colon[-1] == '\t') { 1078 if (http_header_strict) { 1079 return http_request_header_line_invalid(r, 400, "invalid whitespace between field-name and colon -> 400"); 1080 } 1081 else { 1082 /* remove trailing whitespace from key(if !http_header_strict)*/ 1083 do { --colon; } while (colon[-1] == ' ' || colon[-1] == '\t'); 1084 } 1085 } 1086 1087 const int klen = (int)(colon - k); 1088 if (0 == klen) 1089 return http_request_header_line_invalid(r, 400, "invalid header key -> 400"); 1090 const enum http_header_e id = http_header_hkey_get(k, klen); 1091 1092 if (id == HTTP_HEADER_OTHER) { 1093 for (int j = 0; j < klen; ++j) { 1094 if (light_isalpha(k[j]) || k[j] == '-') continue; /*(common cases)*/ 1095 if (0 != http_request_parse_header_other(r, k+j, klen-j, http_header_strict)) 1096 return 400; 1097 break; 1098 } 1099 } 1100 1101 /* remove leading whitespace from value */ 1102 while (*v == ' ' || *v == '\t') ++v; 1103 1104 for (; i+1 <= hoff[0]; ++i) { 1105 end = ptr + hoff[i+1]; 1106 if (end[0] != ' ' && end[0] != '\t') break; 1107 1108 /* line folding */ 1109 #ifdef __COVERITY__ 1110 force_assert(end - k >= 2); 1111 #endif 1112 if (end[-2] == '\r') 1113 end[-2] = ' '; 1114 else if (http_header_strict) 1115 return http_request_header_line_invalid(r, 400, "missing CR before LF in header -> 400"); 1116 end[-1] = ' '; 1117 } 1118 #ifdef __COVERITY__ 1119 /*(buf holding k has non-zero request-line, so end[-2] valid)*/ 1120 force_assert(end >= k + 2); 1121 #endif 1122 if (end[-2] == '\r') 1123 --end; 1124 else if (http_header_strict) 1125 return http_request_header_line_invalid(r, 400, "missing CR before LF in header -> 400"); 1126 /* remove trailing whitespace from value (+ remove '\r\n') */ 1127 /* (line k[-1] is always preceded by a '\n', 1128 * including first header after request-line, 1129 * so no need to check (end != k)) */ 1130 do { --end; } while (end[-1] == ' ' || end[-1] == '\t'); 1131 1132 const int vlen = (int)(end - v); 1133 /* empty header-fields are not allowed by HTTP-RFC, we just ignore them */ 1134 if (vlen <= 0) continue; /* ignore header */ 1135 1136 if (http_header_strict) { 1137 const char * const x = http_request_check_line_strict(v, vlen); 1138 if (x) 1139 return http_request_header_char_invalid(r, *x, 1140 "invalid character in header -> 400"); 1141 } /* else URI already checked in http_request_parse_reqline() for any '\0' */ 1142 1143 int status = http_request_parse_single_header(r, id, k, (size_t)klen, v, (size_t)vlen); 1144 if (0 != status) return status; 1145 } 1146 1147 return 0; 1148 } 1149 1150 1151 static int 1152 http_request_parse (request_st * const restrict r, const int scheme_port) 1153 { 1154 int status = http_request_parse_target(r, scheme_port); 1155 if (0 != status) return status; 1156 1157 /* post-processing */ 1158 const unsigned int http_parseopts = r->conf.http_parseopts; 1159 1160 /* check hostname field if it is set */ 1161 /*(r->http_host might not be set until after parsing request headers)*/ 1162 if (__builtin_expect( (r->http_host != NULL), 1)) { 1163 if (0 != http_request_host_policy(r->http_host, 1164 http_parseopts, scheme_port)) 1165 return http_request_header_line_invalid(r, 400, "Invalid Hostname -> 400"); 1166 buffer_copy_buffer(&r->uri.authority, r->http_host); 1167 } 1168 else { 1169 buffer_copy_string_len(&r->uri.authority, CONST_STR_LEN("")); 1170 if (r->http_version >= HTTP_VERSION_1_1) 1171 return http_request_header_line_invalid(r, 400, "HTTP/1.1 but Host missing -> 400"); 1172 } 1173 1174 if (HTTP_VERSION_1_1 != r->http_version 1175 && (r->rqst_htags 1176 & (light_bshift(HTTP_HEADER_UPGRADE) 1177 |light_bshift(HTTP_HEADER_HTTP2_SETTINGS)))) { 1178 return http_request_header_line_invalid(r, 400, "invalid hop-by-hop header w/o HTTP/1.1 -> 400"); 1179 } 1180 1181 if (0 == r->reqbody_length) { 1182 /* POST requires Content-Length (or Transfer-Encoding) 1183 * (-1 == r->reqbody_length when Transfer-Encoding: chunked)*/ 1184 if (HTTP_METHOD_POST == r->http_method 1185 && !light_btst(r->rqst_htags, HTTP_HEADER_CONTENT_LENGTH)) { 1186 return http_request_header_line_invalid(r, 411, "POST-request, but content-length missing -> 411"); 1187 } 1188 } 1189 else { 1190 /* (-1 == r->reqbody_length when Transfer-Encoding: chunked)*/ 1191 if (-1 == r->reqbody_length 1192 && light_btst(r->rqst_htags, HTTP_HEADER_CONTENT_LENGTH)) { 1193 /* RFC7230 Hypertext Transfer Protocol (HTTP/1.1): Message Syntax and Routing 1194 * 3.3.3. Message Body Length 1195 * [...] 1196 * If a message is received with both a Transfer-Encoding and a 1197 * Content-Length header field, the Transfer-Encoding overrides the 1198 * Content-Length. Such a message might indicate an attempt to 1199 * perform request smuggling (Section 9.5) or response splitting 1200 * (Section 9.4) and ought to be handled as an error. A sender MUST 1201 * remove the received Content-Length field prior to forwarding such 1202 * a message downstream. 1203 */ 1204 const unsigned int http_header_strict = 1205 (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT); 1206 if (http_header_strict) { 1207 return http_request_header_line_invalid(r, 400, "invalid Transfer-Encoding + Content-Length -> 400"); 1208 } 1209 else { 1210 /* ignore Content-Length */ 1211 http_header_request_unset(r, HTTP_HEADER_CONTENT_LENGTH, CONST_STR_LEN("Content-Length")); 1212 } 1213 } 1214 if (http_method_get_or_head(r->http_method) 1215 && !(http_parseopts & HTTP_PARSEOPT_METHOD_GET_BODY)) { 1216 return http_request_header_line_invalid(r, 400, "GET/HEAD with content-length -> 400"); 1217 } 1218 } 1219 1220 return 0; 1221 } 1222 1223 1224 static int 1225 http_request_parse_hoff (request_st * const restrict r, char * const restrict hdrs, const unsigned short * const restrict hoff, const int scheme_port) 1226 { 1227 /* 1228 * Request: "^(GET|POST|HEAD|...) ([^ ]+(\\?[^ ]+|)) (HTTP/1\\.[01])$" 1229 * Header : "^([-a-zA-Z]+): (.+)$" 1230 * End : "^$" 1231 */ 1232 1233 int status; 1234 const unsigned int http_parseopts = r->conf.http_parseopts; 1235 1236 status = http_request_parse_reqline(r, hdrs, hoff, http_parseopts); 1237 if (0 != status) return status; 1238 1239 status = http_request_parse_headers(r, hdrs, hoff, http_parseopts); 1240 if (0 != status) return status; 1241 1242 return http_request_parse(r, scheme_port); 1243 } 1244 1245 1246 static void 1247 http_request_headers_fin (request_st * const restrict r) 1248 { 1249 if (0 == r->http_status) { 1250 #if 0 1251 r->conditional_is_valid = (1 << COMP_SERVER_SOCKET) 1252 | (1 << COMP_HTTP_SCHEME) 1253 | (1 << COMP_HTTP_HOST) 1254 | (1 << COMP_HTTP_REMOTE_IP) 1255 | (1 << COMP_HTTP_REQUEST_METHOD) 1256 | (1 << COMP_HTTP_URL) 1257 | (1 << COMP_HTTP_QUERY_STRING) 1258 | (1 << COMP_HTTP_REQUEST_HEADER); 1259 #else 1260 /* all config conditions are valid after parsing header 1261 * (set all bits; remove dependency on plugin_config.h) */ 1262 r->conditional_is_valid = ~0u; 1263 #endif 1264 } 1265 else { 1266 r->keep_alive = 0; 1267 r->reqbody_length = 0; 1268 } 1269 } 1270 1271 1272 void 1273 http_request_headers_process (request_st * const restrict r, char * const restrict hdrs, const unsigned short * const restrict hoff, const int scheme_port) 1274 { 1275 r->http_status = http_request_parse_hoff(r, hdrs, hoff, scheme_port); 1276 1277 http_request_headers_fin(r); 1278 1279 if (__builtin_expect( (0 != r->http_status), 0)) { 1280 if (r->conf.log_request_header_on_error) { 1281 /*(http_request_parse_headers() modifies hdrs only to 1282 * undo line-wrapping in-place using spaces)*/ 1283 log_error_multiline(r->conf.errh, __FILE__, __LINE__, 1284 hdrs, r->rqst_header_len, "rqst: "); 1285 } 1286 } 1287 } 1288 1289 1290 void 1291 http_request_headers_process_h2 (request_st * const restrict r, const int scheme_port) 1292 { 1293 if (0 == r->http_status) 1294 r->http_status = http_request_parse(r, scheme_port); 1295 1296 if (0 == r->http_status) { 1297 if (light_btst(r->rqst_htags, HTTP_HEADER_CONNECTION)) 1298 r->http_status = http_request_header_line_invalid(r, 400, 1299 "invalid Connection header with HTTP/2 -> 400"); 1300 } 1301 1302 http_request_headers_fin(r); 1303 1304 /* limited; headers not collected into a single buf for HTTP/2 */ 1305 if (__builtin_expect( (0 != r->http_status), 0)) { 1306 if (r->conf.log_request_header_on_error) { 1307 log_error(r->conf.errh, __FILE__, __LINE__, 1308 "request-header:\n:authority: %s\n:method: %s\n:path: %s", 1309 r->http_host ? r->http_host->ptr : "", 1310 http_method_buf(r->http_method)->ptr, 1311 !buffer_is_blank(&r->target) ? r->target.ptr : ""); 1312 } 1313 } 1314 1315 /* ignore Upgrade if using HTTP/2 */ 1316 if (light_btst(r->rqst_htags, HTTP_HEADER_UPGRADE)) 1317 http_header_request_unset(r, HTTP_HEADER_UPGRADE, 1318 CONST_STR_LEN("upgrade")); 1319 /* XXX: should filter out other hop-by-hop connection headers, too */ 1320 } 1321