1 /* 2 * request - HTTP request processing 3 * 4 * Fully-rewritten from original 5 * Copyright(c) 2018 Glenn Strauss gstrauss()gluelogic.com All rights reserved 6 * License: BSD 3-clause (same as lighttpd) 7 */ 8 #include "first.h" 9 10 #include "request.h" 11 #include "burl.h" 12 #include "http_header.h" 13 #include "http_kv.h" 14 #include "log.h" 15 #include "sock_addr.h" 16 17 #include <limits.h> 18 #include <stdint.h> 19 #include <stdlib.h> 20 #include <string.h> 21 22 23 __attribute_cold__ 24 __attribute_noinline__ 25 void 26 http_request_state_append (buffer * const b, request_state_t state) 27 { 28 static const struct sn { const char *s; uint32_t n; } states[] = { 29 { CONST_STR_LEN("connect") } 30 ,{ CONST_STR_LEN("req-start") } 31 ,{ CONST_STR_LEN("read") } 32 ,{ CONST_STR_LEN("req-end") } 33 ,{ CONST_STR_LEN("readpost") } 34 ,{ CONST_STR_LEN("handle-req") } 35 ,{ CONST_STR_LEN("resp-start") } 36 ,{ CONST_STR_LEN("write") } 37 ,{ CONST_STR_LEN("resp-end") } 38 ,{ CONST_STR_LEN("error") } 39 ,{ CONST_STR_LEN("close") } 40 ,{ CONST_STR_LEN("(unknown)") } 41 }; 42 const struct sn * const p = 43 states +((uint32_t)state <= CON_STATE_CLOSE ? state : CON_STATE_CLOSE+1); 44 buffer_append_string_len(b, p->s, p->n); 45 } 46 47 __attribute_cold__ 48 __attribute_noinline__ 49 __attribute_pure__ 50 const char * 51 http_request_state_short (request_state_t state) 52 { 53 /*((char *) returned, but caller must use only one char)*/ 54 static const char sstates[] = ".qrQRhsWSECx"; 55 return 56 sstates+((uint32_t)state <= CON_STATE_CLOSE ? state : CON_STATE_CLOSE+1); 57 } 58 59 60 __attribute_noinline__ 61 __attribute_nonnull__() 62 __attribute_pure__ 63 static const char * http_request_check_uri_strict (const uint8_t * const restrict s, const uint_fast32_t len) { 64 for (uint_fast32_t i = 0; i < len; ++i) { 65 if (__builtin_expect( (s[i] <= 32), 0)) return (const char *)s+i; 66 if (__builtin_expect( (s[i] == 127), 0)) return (const char *)s+i; 67 if (__builtin_expect( (s[i] == 255), 0)) return (const char *)s+i; 68 } 69 return NULL; 70 } 71 72 __attribute_nonnull__() 73 __attribute_pure__ 74 static const char * http_request_check_line_strict (const char * const restrict s, const uint_fast32_t len) { 75 for (uint_fast32_t i = 0; i < len; ++i) { 76 if (__builtin_expect( (((const uint8_t *)s)[i]<32), 0) && s[i] != '\t') 77 return s+i; 78 if (__builtin_expect( (s[i] == 127), 0)) 79 return s+i; 80 } 81 return NULL; 82 } 83 84 __attribute_nonnull__() 85 __attribute_pure__ 86 static const char * http_request_check_line_minimal (const char * const restrict s, const uint_fast32_t len) { 87 for (uint_fast32_t i = 0; i < len; ++i) { 88 if (__builtin_expect( (s[i] == '\0'), 0)) return s+i; 89 if (__builtin_expect( (s[i] == '\r'), 0)) return s+i; 90 if (__builtin_expect( (s[i] == '\n'), 0)) return s+i; 91 } 92 return NULL; 93 } 94 95 static int request_check_hostname(buffer * const host) { 96 /* 97 * hostport = host [ ":" port ] 98 * host = hostname | IPv4address | IPv6address 99 * hostname = *( domainlabel "." ) toplabel [ "." ] 100 * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum 101 * toplabel = alpha | alpha *( alphanum | "-" ) alphanum 102 * IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit 103 * IPv6address = "[" ... "]" 104 * port = *digit 105 */ 106 107 const char *h = host->ptr; 108 109 if (*h != '[') { 110 uint32_t len = buffer_clen(host); 111 const char * const colon = memchr(h, ':', len); 112 uint32_t hlen = colon ? (uint32_t)(colon - h) : len; 113 114 /* if hostname ends in ".", strip it */ 115 if (__builtin_expect( (0 == hlen), 0)) return -1; 116 if (__builtin_expect( (h[hlen-1] == '.'), 0)) { 117 /* shift port info one left */ 118 if (--hlen == 0) return -1; 119 --len; 120 if (NULL != colon) 121 memmove(host->ptr+hlen, colon, len - hlen); 122 buffer_truncate(host, len); 123 } 124 125 int label_len = 0; 126 int allnumeric = 1; 127 int numeric = 1; 128 int level = 0; 129 for (uint32_t i = 0; i < hlen; ++i) { 130 const int ch = h[i]; 131 ++label_len; 132 if (light_isdigit(ch)) 133 continue; 134 else if ((light_isalpha(ch) || (ch == '-' && i != 0))) 135 numeric = 0; 136 else if (ch == '.' && 1 != label_len && '-' != h[i+1]) { 137 allnumeric &= numeric; 138 numeric = 1; 139 label_len = 0; 140 ++level; 141 } 142 else 143 return -1; 144 } 145 /* (if last segment numeric, then IPv4 and must have 4 numeric parts) */ 146 if (0 == label_len || (numeric && (level != 3 || !allnumeric))) 147 return -1; 148 149 h += hlen; 150 } 151 else { /* IPv6 address */ 152 /* check the address inside [...]; note: not fully validating */ 153 /* (note: not allowing scoped literals, e.g. %eth0 suffix) */ 154 ++h; /* step past '[' */ 155 int cnt = 0; 156 while (light_isxdigit(*h) || *h == '.' || (*h == ':' && ++cnt < 8)) ++h; 157 /*(invalid char, too many ':', missing ']', or empty "[]")*/ 158 if (*h != ']' || h - host->ptr == 1) return -1; 159 ++h; /* step past ']' */ 160 } 161 162 /* check numerical port, if present */ 163 if (*h == ':') { 164 if (__builtin_expect( (h[1] == '\0'), 0)) /*(remove trailing colon)*/ 165 buffer_truncate(host, h - host->ptr); 166 do { ++h; } while (light_isdigit(*h)); 167 } 168 169 return (*h == '\0') ? 0 : -1; 170 } 171 172 int http_request_host_normalize(buffer * const b, const int scheme_port) { 173 /* 174 * check for and canonicalize numeric IP address and portnum (optional) 175 * (IP address may be followed by ":portnum" (optional)) 176 * - IPv6: "[...]" 177 * - IPv4: "x.x.x.x" 178 * - IPv4: 12345678 (32-bit decimal number) 179 * - IPv4: 012345678 (32-bit octal number) 180 * - IPv4: 0x12345678 (32-bit hex number) 181 * 182 * allow any chars (except ':' and '\0' and stray '[' or ']') 183 * (other code may check chars more strictly or more pedantically) 184 * ':' delimits (optional) port at end of string 185 * "[]" wraps IPv6 address literal 186 * '\0' should have been rejected earlier were it present 187 * 188 * any chars includes, but is not limited to: 189 * - allow '-' any where, even at beginning of word 190 * (security caution: might be confused for cmd flag if passed to shell) 191 * - allow all-digit TLDs 192 * (might be mistaken for IPv4 addr by inet_aton() 193 * unless non-digits appear in subdomain) 194 */ 195 196 /* Note: not using getaddrinfo() since it does not support "[]" around IPv6 197 * and is not as lenient as inet_aton() and inet_addr() for IPv4 strings. 198 * Not using inet_pton() (when available) on IPv4 for similar reasons. */ 199 200 const char * const p = b->ptr; 201 const size_t blen = buffer_clen(b); 202 long port = 0; 203 204 if (*p != '[') { 205 char * const colon = (char *)memchr(p, ':', blen); 206 if (colon) { 207 if (*p == ':') return -1; /*(empty host then port, or naked IPv6)*/ 208 if (colon[1] != '\0') { 209 char *e; 210 port = strtol(colon+1, &e, 0); /*(allow decimal, octal, hex)*/ 211 if (0 < port && port <= USHRT_MAX && *e == '\0') { 212 /* valid port */ 213 } else { 214 return -1; 215 } 216 } /*(else ignore stray colon at string end)*/ 217 buffer_truncate(b, (size_t)(colon - p)); /*(remove port str)*/ 218 } 219 220 if (light_isdigit(*p)) do { 221 /* (IPv4 address literal or domain starting w/ digit (e.g. 3com))*/ 222 /* (check one-element cache of normalized IPv4 address string) */ 223 static struct { char s[INET_ADDRSTRLEN]; size_t n; } laddr; 224 size_t n = colon ? (size_t)(colon - p) : blen; 225 sock_addr addr; 226 if (n == laddr.n && 0 == memcmp(p, laddr.s, n)) break; 227 if (1 == sock_addr_inet_pton(&addr, p, AF_INET, 0)) { 228 sock_addr_inet_ntop_copy_buffer(b, &addr); 229 n = buffer_clen(b); 230 if (n < sizeof(laddr.s)) memcpy(laddr.s, b->ptr, (laddr.n = n)); 231 } 232 } while (0); 233 } else do { /* IPv6 addr */ 234 #if defined(HAVE_IPV6) && defined(HAVE_INET_PTON) 235 236 /* (check one-element cache of normalized IPv4 address string) */ 237 static struct { char s[INET6_ADDRSTRLEN]; size_t n; } laddr; 238 sock_addr addr; 239 char *bracket = b->ptr+blen-1; 240 char *percent = strchr(b->ptr+1, '%'); 241 size_t len; 242 int rc; 243 char buf[INET6_ADDRSTRLEN+16]; /*(+16 for potential %interface name)*/ 244 if (blen <= 2) return -1; /*(invalid "[]")*/ 245 if (*bracket != ']') { 246 bracket = (char *)memchr(b->ptr+1, ']', blen-1); 247 if (NULL == bracket || bracket[1] != ':' || bracket - b->ptr == 1){ 248 return -1; 249 } 250 if (bracket[2] != '\0') { /*(ignore stray colon at string end)*/ 251 char *e; 252 port = strtol(bracket+2, &e, 0); /*(allow decimal, octal, hex)*/ 253 if (0 < port && port <= USHRT_MAX && *e == '\0') { 254 /* valid port */ 255 } else { 256 return -1; 257 } 258 } 259 } 260 261 len = (size_t)((percent ? percent : bracket) - (b->ptr+1)); 262 if (laddr.n == len && 0 == memcmp(laddr.s, b->ptr+1, len)) { 263 /* truncate after ']' and re-add normalized port, if needed */ 264 buffer_truncate(b, (size_t)(bracket - b->ptr + 1)); 265 break; 266 } 267 268 *bracket = '\0';/*(terminate IPv6 string)*/ 269 if (percent) *percent = '\0'; /*(remove %interface from address)*/ 270 rc = sock_addr_inet_pton(&addr, b->ptr+1, AF_INET6, 0); 271 if (percent) *percent = '%'; /*(restore %interface)*/ 272 *bracket = ']'; /*(restore bracket)*/ 273 if (1 != rc) return -1; 274 275 sock_addr_inet_ntop(&addr, buf, sizeof(buf)); 276 len = strlen(buf); 277 if (percent) { 278 if (percent > bracket) return -1; 279 if (len + (size_t)(bracket - percent) >= sizeof(buf)) return -1; 280 if (len < sizeof(laddr.s)) memcpy(laddr.s, buf, (laddr.n = len)); 281 memcpy(buf+len, percent, (size_t)(bracket - percent)); 282 len += (size_t)(bracket - percent); 283 } 284 buffer_truncate(b, 1); /* truncate after '[' */ 285 buffer_append_str2(b, buf, len, CONST_STR_LEN("]")); 286 287 #else 288 289 return -1; 290 291 #endif 292 } while (0); 293 294 if (0 != port && port != scheme_port) { 295 buffer_append_char(b, ':'); 296 buffer_append_int(b, (int)port); 297 } 298 299 return 0; 300 } 301 302 int http_request_host_policy (buffer * const b, const unsigned int http_parseopts, const int scheme_port) { 303 /* caller should lowercase, as is done in http_request_header_set_Host(), 304 * for consistency in case the value is used prior to calling policy func */ 305 /*buffer_to_lower(b);*/ 306 return (((http_parseopts & HTTP_PARSEOPT_HOST_STRICT) 307 ? 0 != request_check_hostname(b) 308 : NULL != http_request_check_line_minimal(BUF_PTR_LEN(b))) 309 || ((http_parseopts & HTTP_PARSEOPT_HOST_NORMALIZE) 310 && 0 != http_request_host_normalize(b, scheme_port))); 311 } 312 313 __attribute_cold__ 314 __attribute_noinline__ 315 static int http_request_header_line_invalid(request_st * const restrict r, const int status, const char * const restrict msg) { 316 if (r->conf.log_request_header_on_error) { 317 if (msg) log_error(r->conf.errh, __FILE__, __LINE__, "%s", msg); 318 } 319 return status; 320 } 321 322 __attribute_cold__ 323 __attribute_noinline__ 324 static int http_request_header_char_invalid(request_st * const restrict r, const char ch, const char * const restrict msg) { 325 if (r->conf.log_request_header_on_error) { 326 if ((unsigned char)ch > 32 && ch != 127) { 327 log_error(r->conf.errh, __FILE__, __LINE__, "%s ('%c')", msg, ch); 328 } 329 else { 330 log_error(r->conf.errh, __FILE__, __LINE__, "%s (0x%x)", msg, ch); 331 } 332 } 333 return 400; 334 } 335 336 337 __attribute_noinline__ 338 static void http_request_header_set_Host(request_st * const restrict r, const char * const h, size_t hlen) 339 { 340 r->http_host = http_header_request_set_ptr(r, HTTP_HEADER_HOST, 341 CONST_STR_LEN("Host")); 342 buffer_copy_string_len_lc(r->http_host, h, hlen); 343 } 344 345 346 int64_t 347 li_restricted_strtoint64 (const char *v, const uint32_t vlen, const char ** const err) 348 { 349 /* base 10 strtoll() parsing exactly vlen chars and requiring digits 0-9 */ 350 /* rejects negative numbers and considers values > INT64_MAX an error */ 351 /* note: errno is not set; detect error if *err != v+vlen upon return */ 352 /*(caller must check 0 == vlen if that is to be an error for caller)*/ 353 int64_t rv = 0; 354 uint32_t i; 355 for (i = 0; i < vlen; ++i) { 356 const uint8_t c = ((uint8_t *)v)[i] - '0'; /*(unsigned; underflow ok)*/ 357 if (c > 9) break; 358 if (rv > INT64_MAX/10) break; 359 rv *= 10; 360 if (rv > INT64_MAX - c) break; 361 rv += c; 362 } 363 *err = v+i; 364 return rv; 365 } 366 367 368 __attribute_cold__ 369 static int http_request_parse_duplicate(request_st * const restrict r, const enum http_header_e id, const char * const restrict k, const size_t klen, const char * const restrict v, const size_t vlen) { 370 /* Proxies sometimes send dup headers 371 * if they are the same we ignore the second 372 * if not, we raise an error */ 373 const buffer * const vb = http_header_request_get(r, id, k, klen); 374 if (vb && buffer_eq_icase_slen(vb, v, vlen)) 375 return 0; /* ignore header; matches existing header */ 376 377 const char *errmsg; 378 switch (id) { 379 case HTTP_HEADER_HOST: 380 errmsg = "duplicate Host header -> 400"; 381 break; 382 case HTTP_HEADER_CONTENT_TYPE: 383 errmsg = "duplicate Content-Type header -> 400"; 384 break; 385 case HTTP_HEADER_IF_MODIFIED_SINCE: 386 errmsg = "duplicate If-Modified-Since header -> 400"; 387 break; 388 case HTTP_HEADER_HTTP2_SETTINGS: 389 errmsg = "duplicate HTTP2-Settings header -> 400"; 390 break; 391 default: 392 errmsg = "duplicate header -> 400"; 393 break; 394 case HTTP_HEADER_IF_NONE_MATCH: 395 /* if dup, only the first one will survive */ 396 return 0; /* ignore header */ 397 } 398 return http_request_header_line_invalid(r, 400, errmsg); 399 } 400 401 402 /* add header to list of headers 403 * certain headers are also parsed 404 * might drop a header if deemed unnecessary/broken 405 * 406 * returns 0 on success, HTTP status on error 407 */ 408 static int http_request_parse_single_header(request_st * const restrict r, const enum http_header_e id, const char * const restrict k, const size_t klen, const char * const restrict v, const size_t vlen) { 409 /* 410 * Note: k might not be '\0'-terminated 411 * Note: v is not '\0'-terminated 412 * With lighttpd HTTP/1.1 parser, v ends with whitespace 413 * (one of '\r' '\n' ' ' '\t') 414 * With lighttpd HTTP/2 parser, v should not be accessed beyond vlen 415 * (care must be taken to avoid libc funcs which expect z-strings) 416 */ 417 /*assert(vlen);*//*(caller must not call this func with 0 klen or 0 vlen)*/ 418 419 switch (id) { 420 /*case HTTP_HEADER_OTHER:*/ 421 default: 422 break; 423 case HTTP_HEADER_HOST: 424 if (!light_btst(r->rqst_htags, HTTP_HEADER_HOST)) { 425 if (vlen >= 1024) { /*(expecting < 256)*/ 426 return http_request_header_line_invalid(r, 400, "uri-authority too long -> 400"); 427 } 428 /*(http_request_header_append() plus sets r->http_host)*/ 429 http_request_header_set_Host(r, v, vlen); 430 return 0; 431 } 432 else if (NULL != r->http_host 433 && __builtin_expect( buffer_eq_slen(r->http_host,v,vlen), 1)) { 434 /* ignore all Host: headers if match authority in request line */ 435 /* (expect Host to match case in :authority of HTTP/2 request) */ 436 return 0; /* ignore header */ 437 } 438 /* else parse duplicate for match or error */ 439 __attribute_fallthrough__ 440 case HTTP_HEADER_IF_MODIFIED_SINCE: 441 case HTTP_HEADER_IF_NONE_MATCH: 442 case HTTP_HEADER_CONTENT_TYPE: 443 case HTTP_HEADER_HTTP2_SETTINGS: 444 if (light_btst(r->rqst_htags, id)) 445 return http_request_parse_duplicate(r, id, k, klen, v, vlen); 446 break; 447 case HTTP_HEADER_CONNECTION: 448 /* "Connection: close" is common case if header is present */ 449 if ((vlen == 5 && buffer_eq_icase_ssn(v, CONST_STR_LEN("close"))) 450 || http_header_str_contains_token(v,vlen,CONST_STR_LEN("close"))) { 451 r->keep_alive = 0; 452 break; 453 } 454 if (http_header_str_contains_token(v,vlen,CONST_STR_LEN("keep-alive"))){ 455 r->keep_alive = 1; 456 break; 457 } 458 break; 459 case HTTP_HEADER_CONTENT_LENGTH: 460 if (!light_btst(r->rqst_htags, HTTP_HEADER_CONTENT_LENGTH)) { 461 /*(trailing whitespace was removed from vlen)*/ 462 /*(not using strtoll() since v might not be z-string)*/ 463 const char *err; 464 off_t clen = (off_t)li_restricted_strtoint64(v, vlen, &err); 465 if (err == v+vlen) { 466 /* (set only if not set to -1 by Transfer-Encoding: chunked) */ 467 if (0 == r->reqbody_length) r->reqbody_length = clen; 468 } 469 else { 470 return http_request_header_line_invalid(r, 400, "invalid Content-Length header -> 400"); 471 } 472 } 473 else { 474 return http_request_header_line_invalid(r, 400, "duplicate Content-Length header -> 400"); 475 } 476 break; 477 case HTTP_HEADER_TRANSFER_ENCODING: 478 if (HTTP_VERSION_1_1 != r->http_version) { 479 return http_request_header_line_invalid(r, 400, 480 HTTP_VERSION_1_0 == r->http_version 481 ? "HTTP/1.0 with Transfer-Encoding (bad HTTP/1.0 proxy?) -> 400" 482 : "HTTP/2 with Transfer-Encoding is invalid -> 400"); 483 } 484 485 if (!buffer_eq_icase_ss(v, vlen, CONST_STR_LEN("chunked"))) { 486 /* Transfer-Encoding might contain additional encodings, 487 * which are not currently supported by lighttpd */ 488 return http_request_header_line_invalid(r, 501, NULL); /* Not Implemented */ 489 } 490 r->reqbody_length = -1; 491 492 /* Transfer-Encoding is a hop-by-hop header, 493 * which must not be blindly forwarded to backends */ 494 return 0; /* skip header */ 495 } 496 497 http_header_request_append(r, id, k, klen, v, vlen); 498 return 0; 499 } 500 501 __attribute_cold__ 502 static int http_request_parse_proto_loose(request_st * const restrict r, const char * const restrict ptr, const size_t len, const unsigned int http_parseopts) { 503 const char * proto = memchr(ptr, ' ', len); 504 if (NULL == proto) 505 return http_request_header_line_invalid(r, 400, "incomplete request line -> 400"); 506 proto = memchr(proto+1, ' ', len - (proto+1 - ptr)); 507 if (NULL == proto) 508 return http_request_header_line_invalid(r, 400, "incomplete request line -> 400"); 509 ++proto; 510 511 if (proto[0]=='H' && proto[1]=='T' && proto[2]=='T' && proto[3]=='P' && proto[4] == '/') { 512 if (proto[5] == '1' && proto[6] == '.' && (proto[7] == '1' || proto[7] == '0')) { 513 /* length already checked before calling this routine */ 514 /* (len != (size_t)(proto - ptr + 8)) */ 515 if (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT) /*(http_header_strict)*/ 516 return http_request_header_line_invalid(r, 400, "incomplete request line -> 400"); 517 r->http_version = (proto[7] == '1') ? HTTP_VERSION_1_1 : HTTP_VERSION_1_0; 518 } 519 else 520 return http_request_header_line_invalid(r, 505, "unknown HTTP version -> 505"); 521 } 522 else 523 return http_request_header_line_invalid(r, 400, "unknown protocol -> 400"); 524 525 /* keep-alive default: HTTP/1.1 -> true; HTTP/1.0 -> false */ 526 r->keep_alive = (HTTP_VERSION_1_0 != r->http_version); 527 528 return 0; 529 } 530 531 __attribute_cold__ 532 static const char * http_request_parse_reqline_uri(request_st * const restrict r, const char * const restrict uri, const size_t len, const unsigned int http_parseopts) { 533 const char *nuri; 534 if ((len > 7 && buffer_eq_icase_ssn(uri, "http://", 7) 535 && NULL != (nuri = memchr(uri + 7, '/', len-7))) 536 || 537 (len > 8 && buffer_eq_icase_ssn(uri, "https://", 8) 538 && NULL != (nuri = memchr(uri + 8, '/', len-8)))) { 539 const char * const host = uri + (uri[4] == ':' ? 7 : 8); 540 const size_t hostlen = nuri - host; 541 if (0 == hostlen || hostlen >= 1024) { /*(expecting < 256)*/ 542 http_request_header_line_invalid(r, 400, "uri-authority empty or too long -> 400"); 543 return NULL; 544 } 545 /* Insert as "Host" header */ 546 http_request_header_set_Host(r, host, hostlen); 547 return nuri; 548 } else if (!(http_parseopts & HTTP_PARSEOPT_HEADER_STRICT) /*(!http_header_strict)*/ 549 || (HTTP_METHOD_CONNECT == r->http_method && (uri[0] == ':' || light_isdigit(uri[0]))) 550 || (HTTP_METHOD_OPTIONS == r->http_method && uri[0] == '*' && 1 == len)) { 551 /* (permitted) */ 552 return uri; 553 } else { 554 http_request_header_line_invalid(r, 400, "request-URI parse error -> 400"); 555 return NULL; 556 } 557 } 558 559 560 __attribute_cold__ 561 __attribute_noinline__ 562 static int http_request_parse_header_other(request_st * const restrict r, const char * const restrict k, const int klen, const unsigned int http_header_strict); 563 564 565 int 566 http_request_validate_pseudohdrs (request_st * const restrict r, const int scheme, const unsigned int http_parseopts) 567 { 568 /* :method is required to indicate method 569 * CONNECT method must have :method and :authority 570 * unless RFC8441 CONNECT extension, which must follow 'other' (below) 571 * All other methods must have at least :method :scheme :path */ 572 573 if (HTTP_METHOD_UNSET == r->http_method) 574 return http_request_header_line_invalid(r, 400, 575 "missing pseudo-header method -> 400"); 576 577 if (HTTP_METHOD_CONNECT != r->http_method) 578 r->h2_connect_ext = 0; 579 580 if (__builtin_expect( (HTTP_METHOD_CONNECT != r->http_method), 1) 581 || __builtin_expect( (r->h2_connect_ext != 0), 0)) { 582 583 if (!scheme) 584 return http_request_header_line_invalid(r, 400, 585 "missing pseudo-header scheme -> 400"); 586 587 if (buffer_is_blank(&r->target)) 588 return http_request_header_line_invalid(r, 400, 589 "missing pseudo-header path -> 400"); 590 591 const char * const uri = r->target.ptr; 592 if (*uri != '/') { /* (common case: (*uri == '/')) */ 593 if (uri[0] != '*' || uri[1] != '\0' 594 || HTTP_METHOD_OPTIONS != r->http_method) 595 return http_request_header_line_invalid(r, 400, 596 "invalid pseudo-header path -> 400"); 597 } 598 } 599 else { /* HTTP_METHOD_CONNECT */ 600 if (NULL == r->http_host) 601 return http_request_header_line_invalid(r, 400, 602 "missing pseudo-header authority -> 400"); 603 if (!buffer_is_blank(&r->target) || scheme) 604 return http_request_header_line_invalid(r, 400, 605 "invalid pseudo-header with CONNECT -> 400"); 606 /* note: this copy occurs prior to http_request_host_policy() 607 * so any consumer handling CONNECT should normalize r->target 608 * as appropriate */ 609 buffer_copy_buffer(&r->target, r->http_host); 610 } 611 buffer_copy_buffer(&r->target_orig, &r->target); 612 613 /* r->http_host, if set, is checked with http_request_host_policy() 614 * in http_request_parse() */ 615 616 /* copied and modified from end of http_request_parse_reqline() */ 617 618 /* check uri for invalid characters */ 619 const uint32_t len = buffer_clen(&r->target);/*(http_header_strict)*/ 620 const char * const x = (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT) 621 ? (http_parseopts & HTTP_PARSEOPT_URL_NORMALIZE_CTRLS_REJECT) 622 ? NULL /* URI will be checked in http_request_parse_target() */ 623 : http_request_check_uri_strict((const uint8_t *)r->target.ptr, len) 624 : http_request_check_line_minimal(r->target.ptr, len); 625 return (NULL == x) 626 ? 0 627 : http_request_header_char_invalid(r, *x, 628 "invalid character in URI -> 400"); 629 } 630 631 632 int 633 http_request_parse_header (request_st * const restrict r, http_header_parse_ctx * const restrict hpctx) 634 { 635 /* Note: k and v might not be '\0' terminated strings; 636 * care must be taken to avoid libc funcs which expect z-strings */ 637 const char * const restrict k = hpctx->k; 638 const char * restrict v = hpctx->v; 639 const uint32_t klen = hpctx->klen; 640 uint32_t vlen = hpctx->vlen; 641 642 if (0 == klen) 643 return http_request_header_line_invalid(r, 400, 644 "invalid header key -> 400"); 645 646 if ((hpctx->hlen += klen + vlen + 4) > hpctx->max_request_field_size) { 647 /*(configurable with server.max-request-field-size; default 8k)*/ 648 #if 1 /* emit to error log for people sending large headers */ 649 log_error(r->conf.errh, __FILE__, __LINE__, 650 "oversized request header -> 431"); 651 return 431; /* Request Header Fields Too Large */ 652 #else 653 /* 431 Request Header Fields Too Large */ 654 return http_request_header_line_invalid(r, 431, 655 "oversized request header -> 431"); 656 #endif 657 } 658 659 if (!hpctx->trailers) { 660 if (*k == ':') { 661 /* HTTP/2 request pseudo-header fields */ 662 if (!hpctx->pseudo) /*(pseudo header after non-pseudo header)*/ 663 return http_request_header_line_invalid(r, 400, 664 "invalid pseudo-header -> 400"); 665 if (0 == vlen) 666 return http_request_header_line_invalid(r, 400, 667 "invalid header value -> 400"); 668 669 /* (note: relies on implementation details using ls-hpack in h2.c) 670 * (hpctx->id mapped from lsxpack_header_t hpack_index, which only 671 * matches key, not also value, if lsxpack_header_t flags does not 672 * have LSXPACK_HPACK_VAL_MATCHED set, so HTTP_HEADER_H2_METHOD_GET 673 * below indicates any method, not only "GET") */ 674 if (__builtin_expect( (hpctx->id == HTTP_HEADER_H2_UNKNOWN), 0)) { 675 switch (klen-1) { 676 case 4: 677 if (0 == memcmp(k+1, "path", 4)) 678 hpctx->id = HTTP_HEADER_H2_PATH; 679 break; 680 case 6: 681 if (0 == memcmp(k+1, "method", 6)) 682 hpctx->id = HTTP_HEADER_H2_METHOD_GET; 683 else if (0 == memcmp(k+1, "scheme", 6)) 684 hpctx->id = HTTP_HEADER_H2_SCHEME_HTTP; 685 break; 686 case 8: 687 if (0 == memcmp(k+1, "protocol", 8)) 688 hpctx->id = HTTP_HEADER_H2_PROTOCOL; 689 break; 690 case 9: 691 if (0 == memcmp(k+1, "authority", 9)) 692 hpctx->id = HTTP_HEADER_H2_AUTHORITY; 693 break; 694 default: 695 break; 696 } 697 if (hpctx->id >= HTTP_HEADER_H2_UNKNOWN) 698 return http_request_header_line_invalid(r, 400, 699 "invalid pseudo-header -> 400"); 700 } 701 702 switch (hpctx->id) { 703 case HTTP_HEADER_H2_AUTHORITY: 704 if (__builtin_expect( (r->http_host != NULL), 0)) 705 break; 706 if (vlen >= 1024) /*(expecting < 256)*/ 707 return http_request_header_line_invalid(r, 400, 708 "invalid pseudo-header authority too long -> 400"); 709 /* insert as "Host" header */ 710 http_request_header_set_Host(r, v, vlen); 711 return 0; 712 case HTTP_HEADER_H2_METHOD_GET: /*(any method, not only "GET")*/ 713 case HTTP_HEADER_H2_METHOD_POST: 714 if (__builtin_expect( (HTTP_METHOD_UNSET != r->http_method), 0)) 715 break; 716 r->http_method = get_http_method_key(v, vlen); 717 if (HTTP_METHOD_UNSET >= r->http_method) 718 return http_request_header_line_invalid(r, 501, 719 "unknown http-method -> 501"); 720 return 0; 721 case HTTP_HEADER_H2_PATH: /*(any path, not only "/")*/ 722 case HTTP_HEADER_H2_PATH_INDEX_HTML: 723 if (__builtin_expect( (!buffer_is_blank(&r->target)), 0)) 724 break; 725 buffer_copy_string_len(&r->target, v, vlen); 726 return 0; 727 case HTTP_HEADER_H2_SCHEME_HTTP: /*(any scheme, not only "http")*/ 728 case HTTP_HEADER_H2_SCHEME_HTTPS: 729 if (__builtin_expect( (hpctx->scheme), 0)) 730 break; 731 hpctx->scheme = 1; /*(marked present, but otherwise ignored)*/ 732 return 0; 733 #if 0 734 switch (vlen) {/*(validated, but then ignored)*/ 735 case 5: /* "https" */ 736 if (v[4]!='s') break; 737 __attribute_fallthrough__ 738 case 4: /* "http" */ 739 if (v[0]=='h' && v[1]=='t' && v[2]=='t' && v[3]=='p') { 740 hpctx->scheme = 1; 741 return 0; 742 } 743 break; 744 default: 745 break; 746 } 747 return http_request_header_line_invalid(r, 400, 748 "unknown pseudo-header scheme -> 400"); 749 #endif 750 case HTTP_HEADER_H2_PROTOCOL: 751 /* support only ":protocol: websocket" for now */ 752 if (vlen != 9 || 0 != memcmp(v, "websocket", 9)) 753 return http_request_header_line_invalid(r, 405, 754 "unhandled :protocol value -> 405"); 755 /*(future: might be enum of recognized :protocol: ext values)*/ 756 r->h2_connect_ext = 1; 757 return 0; 758 default: 759 return http_request_header_line_invalid(r, 400, 760 "invalid pseudo-header -> 400"); 761 } 762 return http_request_header_line_invalid(r, 400, 763 "repeated pseudo-header -> 400"); 764 } 765 else { /*(non-pseudo headers)*/ 766 if (hpctx->pseudo) { /*(transition to non-pseudo headers)*/ 767 hpctx->pseudo = 0; 768 int status = 769 http_request_validate_pseudohdrs(r, hpctx->scheme, 770 hpctx->http_parseopts); 771 if (0 != status) return status; 772 } 773 if (0 == vlen) 774 return 0; 775 776 const unsigned int http_header_strict = 777 (hpctx->http_parseopts & HTTP_PARSEOPT_HEADER_STRICT); 778 779 const char * const x = (http_header_strict) 780 ? http_request_check_line_strict(v, vlen) 781 : http_request_check_line_minimal(v, vlen); 782 if (x) 783 return http_request_header_char_invalid(r, *x, 784 "invalid character in header -> 400"); 785 786 /* remove leading and trailing whitespace (strict RFC conformance)*/ 787 if (__builtin_expect( (*v <= 0x20), 0)) { 788 while ((*v == ' ' || *v == '\t') && (++v, --vlen)) ; 789 if (0 == vlen) 790 return 0; 791 } 792 if (__builtin_expect( (v[vlen-1] <= 0x20), 0)) { 793 while (v[vlen-1] == ' ' || v[vlen-1] == '\t') --vlen; 794 } 795 796 if (__builtin_expect( (hpctx->id == HTTP_HEADER_H2_UNKNOWN), 0)) { 797 uint32_t j = 0; 798 while (j < klen && (light_islower(k[j]) || k[j] == '-')) 799 ++j; 800 801 if (__builtin_expect( (j != klen), 0)) { 802 if (light_isupper(k[j])) 803 return 400; 804 if (0 != http_request_parse_header_other(r, k+j, klen-j, 805 http_header_strict)) 806 return 400; 807 } 808 809 hpctx->id = http_header_hkey_get_lc(k, klen); 810 } 811 812 const enum http_header_e id = (enum http_header_e)hpctx->id; 813 814 if (__builtin_expect( (id == HTTP_HEADER_TE), 0) 815 && !buffer_eq_icase_ss(v, vlen, CONST_STR_LEN("trailers"))) 816 return http_request_header_line_invalid(r, 400, 817 "invalid TE header value with HTTP/2 -> 400"); 818 819 return http_request_parse_single_header(r, id, k, klen, v, vlen); 820 } 821 } 822 else { /*(trailers)*/ 823 if (*k == ':') 824 return http_request_header_line_invalid(r, 400, 825 "invalid pseudo-header in trailers -> 400"); 826 /* ignore trailers (after required HPACK decoding) if streaming 827 * request body to backend since headers have already been sent 828 * to backend via Common Gateway Interface (CGI) (CGI, FastCGI, 829 * SCGI, etc) or HTTP/1.1 (proxy) (mod_proxy does not currently 830 * support using HTTP/2 to connect to backends) */ 831 #if 0 /* (if needed, save flag in hpctx instead of fdevent.h dependency)*/ 832 if (r->conf.stream_request_body & FDEVENT_STREAM_REQUEST) 833 return 0; 834 #endif 835 /* Note: do not unconditionally merge into headers since if 836 * headers had already been sent to backend, then mod_accesslog 837 * logging of request headers might be inaccurate. 838 * Many simple backends do not support HTTP/1.1 requests sending 839 * Transfer-Encoding: chunked, and even those that do might not 840 * handle trailers. Some backends do not even support HTTP/1.1. 841 * For all these reasons, ignore trailers if streaming request 842 * body to backend. Revisit in future if adding support for 843 * connecting to backends using HTTP/2 (with explicit config 844 * option to force connecting to backends using HTTP/2) */ 845 846 /* XXX: TODO: request trailers not handled if streaming reqbody 847 * XXX: must ensure that trailers are not disallowed field-names 848 */ 849 850 #if 0 851 if (0 == vlen) 852 return 0; 853 #endif 854 855 return 0; 856 } 857 } 858 859 860 static int http_request_parse_reqline(request_st * const restrict r, const char * const restrict ptr, const unsigned short * const restrict hoff, const unsigned int http_parseopts) { 861 size_t len = hoff[2]; 862 863 /* parse the first line of the request 864 * <method> <uri> <protocol>\r\n 865 * */ 866 if (len < 13) /* minimum len with (!http_header_strict): "x x HTTP/1.0\n" */ 867 return http_request_header_line_invalid(r, 400, "invalid request line (too short) -> 400"); 868 if (ptr[len-2] == '\r') 869 len-=2; 870 else if (!(http_parseopts & HTTP_PARSEOPT_HEADER_STRICT)) /*(!http_header_strict)*/ 871 len-=1; 872 else 873 return http_request_header_line_invalid(r, 400, "missing CR before LF in header -> 400"); 874 875 /* 876 * RFC7230: 877 * HTTP-version = HTTP-name "/" DIGIT "." DIGIT 878 * HTTP-name = %x48.54.54.50 ; "HTTP", case-sensitive 879 */ 880 881 /* protocol is expected to be " HTTP/1.1" or " HTTP/1.0" at end of line */ 882 union proto_un { 883 char c[8]; 884 uint64_t u; 885 }; 886 static const union proto_un http_1_1 = {{'H','T','T','P','/','1','.','1'}}; 887 static const union proto_un http_1_0 = {{'H','T','T','P','/','1','.','0'}}; 888 const char *p = ptr + len - 8; 889 union proto_un proto8; 890 proto8.c[0]=p[0]; proto8.c[1]=p[1]; proto8.c[2]=p[2]; proto8.c[3]=p[3]; 891 proto8.c[4]=p[4]; proto8.c[5]=p[5]; proto8.c[6]=p[6]; proto8.c[7]=p[7]; 892 if (p[-1] == ' ' && http_1_1.u == proto8.u) { 893 r->http_version = HTTP_VERSION_1_1; 894 r->keep_alive = 1; /* keep-alive default: HTTP/1.1 -> true */ 895 } 896 else if (p[-1] == ' ' && http_1_0.u == proto8.u) { 897 r->http_version = HTTP_VERSION_1_0; 898 r->keep_alive = 0; /* keep-alive default: HTTP/1.0 -> false */ 899 } 900 else { 901 int status = http_request_parse_proto_loose(r,ptr,len,http_parseopts); 902 if (0 != status) return status; 903 /*(space char must exist if http_request_parse_proto_loose() succeeds)*/ 904 for (p = ptr + len - 9; p[-1] != ' '; --p) ; 905 } 906 907 /* method is expected to be a short string in the general case */ 908 size_t i = 0; 909 while (ptr[i] != ' ') ++i; 910 #if 0 /*(space must exist if protocol was parsed successfully)*/ 911 while (i < len && ptr[i] != ' ') ++i; 912 if (ptr[i] != ' ') 913 return http_request_header_line_invalid(r, 400, "incomplete request line -> 400"); 914 #endif 915 916 r->http_method = get_http_method_key(ptr, i); 917 if (HTTP_METHOD_UNSET >= r->http_method) 918 return http_request_header_line_invalid(r, 501, "unknown http-method -> 501"); 919 920 const char *uri = ptr + i + 1; 921 922 if (uri == p) 923 return http_request_header_line_invalid(r, 400, "no uri specified -> 400"); 924 len = (size_t)(p - uri - 1); 925 926 if (*uri != '/') { /* (common case: (*uri == '/')) */ 927 uri = http_request_parse_reqline_uri(r, uri, len, http_parseopts); 928 if (NULL == uri) return 400; 929 len = (size_t)(p - uri - 1); 930 } 931 932 if (0 == len) 933 return http_request_header_line_invalid(r, 400, "no uri specified -> 400"); 934 935 /* check uri for invalid characters */ /* http_header_strict */ 936 const char * const x = (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT) 937 ? (http_parseopts & HTTP_PARSEOPT_URL_NORMALIZE_CTRLS_REJECT) 938 ? NULL /* URI will be checked in http_request_parse_target() */ 939 : http_request_check_uri_strict((const uint8_t *)uri, len) 940 : memchr(ptr, '\0', hoff[hoff[0]]);/* check entire headers set for '\0' */ 941 if (x) 942 http_request_header_char_invalid(r, *x, "invalid character in URI -> 400"); 943 944 buffer_copy_string_len(&r->target, uri, len); 945 buffer_copy_string_len(&r->target_orig, uri, len); 946 return 0; 947 } 948 949 int http_request_parse_target(request_st * const r, int scheme_port) { 950 /* URI is parsed into components at start of request and may 951 * also be re-parsed upon HANDLER_COMEBACK during the request 952 * r->target is expected to be a "/url-part?query-part" 953 * (and *not* a fully-qualified URI starting https://...) 954 * r->uri.authority is expected to be parsed elsewhere into r->http_host 955 */ 956 957 /** 958 * prepare strings 959 * 960 * - uri.path 961 * - uri.query 962 * 963 */ 964 965 /** 966 * Name according to RFC 2396 967 * 968 * - scheme 969 * - authority 970 * - path 971 * - query 972 * 973 * (scheme)://(authority)(path)?(query)#fragment 974 * 975 */ 976 977 /* take initial scheme value from connection-level state 978 * (request r->uri.scheme can be overwritten for later, 979 * for example by mod_extforward or mod_magnet) */ 980 buffer_copy_string_len(&r->uri.scheme, "https", scheme_port == 443 ? 5 : 4); 981 982 buffer * const target = &r->target; 983 if ((r->http_method == HTTP_METHOD_CONNECT && !r->h2_connect_ext) 984 || (r->http_method == HTTP_METHOD_OPTIONS 985 && target->ptr[0] == '*' 986 && target->ptr[1] == '\0')) { 987 /* CONNECT ... (or) OPTIONS * ... */ 988 buffer_copy_buffer(&r->uri.path, target); 989 buffer_clear(&r->uri.query); 990 return 0; 991 } 992 993 char *qstr; 994 if (r->conf.http_parseopts & HTTP_PARSEOPT_URL_NORMALIZE) { 995 /*uint32_t len = buffer_clen(target);*/ 996 int qs = burl_normalize(target, r->tmp_buf, r->conf.http_parseopts); 997 if (-2 == qs) 998 return http_request_header_line_invalid(r, 400, 999 "invalid character in URI -> 400"); /* Bad Request */ 1000 qstr = (-1 == qs) ? NULL : target->ptr+qs; 1001 #if 0 /* future: might enable here, or below for all requests */ 1002 /* (Note: total header size not recalculated on HANDLER_COMEBACK 1003 * even if other request headers changed during processing) 1004 * (If (0 != r->loops_per_request), then the generated 1005 * request is too large. Should a different error be returned?) */ 1006 r->rqst_header_len -= len; 1007 len = buffer_clen(target); 1008 r->rqst_header_len += len; 1009 if (len > MAX_HTTP_REQUEST_URI) { 1010 return 414; /* 414 URI Too Long */ 1011 } 1012 if (r->rqst_header_len > MAX_HTTP_REQUEST_HEADER) { 1013 log_error(r->conf.errh, __FILE__, __LINE__, 1014 "request header fields too large: %u -> 431", 1015 r->rqst_header_len); 1016 return 431; /* Request Header Fields Too Large */ 1017 } 1018 #endif 1019 } 1020 else { 1021 size_t rlen = buffer_clen(target); 1022 qstr = memchr(target->ptr, '#', rlen);/* discard fragment */ 1023 if (qstr) { 1024 rlen = (size_t)(qstr - target->ptr); 1025 buffer_truncate(target, rlen); 1026 } 1027 qstr = memchr(target->ptr, '?', rlen); 1028 } 1029 1030 /** extract query string from target */ 1031 const char * const pstr = target->ptr; 1032 const uint32_t rlen = buffer_clen(target); 1033 uint32_t plen; 1034 if (NULL != qstr) { 1035 plen = (uint32_t)(qstr - pstr); 1036 buffer_copy_string_len(&r->uri.query, qstr + 1, rlen - plen - 1); 1037 } 1038 else { 1039 plen = rlen; 1040 buffer_clear(&r->uri.query); 1041 } 1042 buffer_copy_string_len(&r->uri.path, pstr, plen); 1043 1044 /* decode url to path 1045 * 1046 * - decode url-encodings (e.g. %20 -> ' ') 1047 * - remove path-modifiers (e.g. /../) 1048 */ 1049 1050 buffer_urldecode_path(&r->uri.path); 1051 buffer_path_simplify(&r->uri.path); 1052 if (r->uri.path.ptr[0] != '/') 1053 return http_request_header_line_invalid(r, 400, 1054 "uri-path does not begin with '/' -> 400"); /* Bad Request */ 1055 1056 return 0; 1057 } 1058 1059 __attribute_cold__ 1060 __attribute_noinline__ 1061 static int http_request_parse_header_other(request_st * const restrict r, const char * const restrict k, const int klen, const unsigned int http_header_strict) { 1062 for (int i = 0; i < klen; ++i) { 1063 if (light_isalpha(k[i]) || k[i] == '-') continue; /*(common cases)*/ 1064 /** 1065 * 1*<any CHAR except CTLs or separators> 1066 * CTLs == 0-31 + 127, CHAR = 7-bit ascii (0..127) 1067 * 1068 */ 1069 switch(k[i]) { 1070 case ' ': 1071 case '\t': 1072 return http_request_header_line_invalid(r, 400, "WS character in key -> 400"); 1073 case '\r': 1074 case '\n': 1075 case '(': 1076 case ')': 1077 case '<': 1078 case '>': 1079 case '@': 1080 case ',': 1081 case ':': 1082 case ';': 1083 case '\\': 1084 case '\"': 1085 case '/': 1086 case '[': 1087 case ']': 1088 case '?': 1089 case '=': 1090 case '{': 1091 case '}': 1092 return http_request_header_char_invalid(r, k[i], "invalid character in header key -> 400"); 1093 default: 1094 if (http_header_strict ? (k[i] < 32 || ((unsigned char *)k)[i] >= 127) : k[i] == '\0') 1095 return http_request_header_char_invalid(r, k[i], "invalid character in header key -> 400"); 1096 break; /* ok */ 1097 } 1098 } 1099 return 0; 1100 } 1101 1102 static int http_request_parse_headers(request_st * const restrict r, char * const restrict ptr, const unsigned short * const restrict hoff, const unsigned int http_parseopts) { 1103 const unsigned int http_header_strict = (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT); 1104 1105 #if 0 /*(not checked here; will later result in invalid label for HTTP header)*/ 1106 int i = hoff[2]; 1107 1108 if (ptr[i] == ' ' || ptr[i] == '\t') { 1109 return http_request_header_line_invalid(r, 400, "WS at the start of first line -> 400"); 1110 } 1111 #endif 1112 1113 for (int i = 2; i < hoff[0]; ++i) { 1114 const char *k = ptr + hoff[i]; 1115 /* one past last line hoff[hoff[0]] is to final "\r\n" */ 1116 char *end = ptr + hoff[i+1]; 1117 1118 const char *colon = memchr(k, ':', end - k); 1119 if (NULL == colon) 1120 return http_request_header_line_invalid(r, 400, "invalid header missing ':' -> 400"); 1121 1122 const char *v = colon + 1; 1123 1124 /* RFC7230 Hypertext Transfer Protocol (HTTP/1.1): Message Syntax and Routing 1125 * 3.2.4. Field Parsing 1126 * [...] 1127 * No whitespace is allowed between the header field-name and colon. In 1128 * the past, differences in the handling of such whitespace have led to 1129 * security vulnerabilities in request routing and response handling. A 1130 * server MUST reject any received request message that contains 1131 * whitespace between a header field-name and colon with a response code 1132 * of 400 (Bad Request). A proxy MUST remove any such whitespace from a 1133 * response message before forwarding the message downstream. 1134 */ 1135 /* (line k[-1] is always preceded by a '\n', 1136 * including first header after request-line, 1137 * so no need to check colon != k) */ 1138 if (colon[-1] == ' ' || colon[-1] == '\t') { 1139 if (http_header_strict) { 1140 return http_request_header_line_invalid(r, 400, "invalid whitespace between field-name and colon -> 400"); 1141 } 1142 else { 1143 /* remove trailing whitespace from key(if !http_header_strict)*/ 1144 do { --colon; } while (colon[-1] == ' ' || colon[-1] == '\t'); 1145 } 1146 } 1147 1148 const int klen = (int)(colon - k); 1149 if (0 == klen) 1150 return http_request_header_line_invalid(r, 400, "invalid header key -> 400"); 1151 const enum http_header_e id = http_header_hkey_get(k, klen); 1152 1153 if (id == HTTP_HEADER_OTHER) { 1154 for (int j = 0; j < klen; ++j) { 1155 if (light_isalpha(k[j]) || k[j] == '-') continue; /*(common cases)*/ 1156 if (0 != http_request_parse_header_other(r, k+j, klen-j, http_header_strict)) 1157 return 400; 1158 break; 1159 } 1160 } 1161 1162 /* remove leading whitespace from value */ 1163 while (*v == ' ' || *v == '\t') ++v; 1164 1165 for (; i+1 <= hoff[0]; ++i) { 1166 end = ptr + hoff[i+1]; 1167 if (end[0] != ' ' && end[0] != '\t') break; 1168 1169 /* line folding */ 1170 #ifdef __COVERITY__ 1171 force_assert(end - k >= 2); 1172 #endif 1173 if (end[-2] == '\r') 1174 end[-2] = ' '; 1175 else if (http_header_strict) 1176 return http_request_header_line_invalid(r, 400, "missing CR before LF in header -> 400"); 1177 end[-1] = ' '; 1178 } 1179 #ifdef __COVERITY__ 1180 /*(buf holding k has non-zero request-line, so end[-2] valid)*/ 1181 force_assert(end >= k + 2); 1182 #endif 1183 if (end[-2] == '\r') 1184 --end; 1185 else if (http_header_strict) 1186 return http_request_header_line_invalid(r, 400, "missing CR before LF in header -> 400"); 1187 /* remove trailing whitespace from value (+ remove '\r\n') */ 1188 /* (line k[-1] is always preceded by a '\n', 1189 * including first header after request-line, 1190 * so no need to check (end != k)) */ 1191 do { --end; } while (end[-1] == ' ' || end[-1] == '\t'); 1192 1193 const int vlen = (int)(end - v); 1194 /* empty header-fields are not allowed by HTTP-RFC, we just ignore them */ 1195 if (vlen <= 0) continue; /* ignore header */ 1196 1197 if (http_header_strict) { 1198 const char * const x = http_request_check_line_strict(v, vlen); 1199 if (x) 1200 return http_request_header_char_invalid(r, *x, 1201 "invalid character in header -> 400"); 1202 } /* else URI already checked in http_request_parse_reqline() for any '\0' */ 1203 1204 int status = http_request_parse_single_header(r, id, k, (size_t)klen, v, (size_t)vlen); 1205 if (0 != status) return status; 1206 } 1207 1208 return 0; 1209 } 1210 1211 1212 static int 1213 http_request_parse (request_st * const restrict r, const int scheme_port) 1214 { 1215 int status = http_request_parse_target(r, scheme_port); 1216 if (0 != status) return status; 1217 1218 /* post-processing */ 1219 const unsigned int http_parseopts = r->conf.http_parseopts; 1220 1221 /* check hostname field if it is set */ 1222 /*(r->http_host might not be set until after parsing request headers)*/ 1223 if (__builtin_expect( (r->http_host != NULL), 1)) { 1224 if (0 != http_request_host_policy(r->http_host, 1225 http_parseopts, scheme_port)) 1226 return http_request_header_line_invalid(r, 400, "Invalid Hostname -> 400"); 1227 buffer_copy_buffer(&r->uri.authority, r->http_host); 1228 } 1229 else { 1230 buffer_copy_string_len(&r->uri.authority, CONST_STR_LEN("")); 1231 if (r->http_version >= HTTP_VERSION_1_1) 1232 return http_request_header_line_invalid(r, 400, "HTTP/1.1 but Host missing -> 400"); 1233 } 1234 1235 if (HTTP_VERSION_1_1 != r->http_version 1236 && (r->rqst_htags 1237 & (light_bshift(HTTP_HEADER_UPGRADE) 1238 |light_bshift(HTTP_HEADER_HTTP2_SETTINGS)))) { 1239 return http_request_header_line_invalid(r, 400, "invalid hop-by-hop header w/o HTTP/1.1 -> 400"); 1240 } 1241 1242 if (0 == r->reqbody_length) { 1243 /* POST requires Content-Length (or Transfer-Encoding) 1244 * (-1 == r->reqbody_length when Transfer-Encoding: chunked)*/ 1245 if (HTTP_METHOD_POST == r->http_method 1246 && !light_btst(r->rqst_htags, HTTP_HEADER_CONTENT_LENGTH)) { 1247 return http_request_header_line_invalid(r, 411, "POST-request, but content-length missing -> 411"); 1248 } 1249 } 1250 else { 1251 /* (-1 == r->reqbody_length when Transfer-Encoding: chunked)*/ 1252 if (-1 == r->reqbody_length 1253 && light_btst(r->rqst_htags, HTTP_HEADER_CONTENT_LENGTH)) { 1254 /* RFC7230 Hypertext Transfer Protocol (HTTP/1.1): Message Syntax and Routing 1255 * 3.3.3. Message Body Length 1256 * [...] 1257 * If a message is received with both a Transfer-Encoding and a 1258 * Content-Length header field, the Transfer-Encoding overrides the 1259 * Content-Length. Such a message might indicate an attempt to 1260 * perform request smuggling (Section 9.5) or response splitting 1261 * (Section 9.4) and ought to be handled as an error. A sender MUST 1262 * remove the received Content-Length field prior to forwarding such 1263 * a message downstream. 1264 */ 1265 const unsigned int http_header_strict = 1266 (http_parseopts & HTTP_PARSEOPT_HEADER_STRICT); 1267 if (http_header_strict) { 1268 return http_request_header_line_invalid(r, 400, "invalid Transfer-Encoding + Content-Length -> 400"); 1269 } 1270 else { 1271 /* ignore Content-Length */ 1272 http_header_request_unset(r, HTTP_HEADER_CONTENT_LENGTH, CONST_STR_LEN("Content-Length")); 1273 } 1274 } 1275 if (http_method_get_or_head(r->http_method) 1276 && !(http_parseopts & HTTP_PARSEOPT_METHOD_GET_BODY)) { 1277 return http_request_header_line_invalid(r, 400, "GET/HEAD with content-length -> 400"); 1278 } 1279 } 1280 1281 return 0; 1282 } 1283 1284 1285 static int 1286 http_request_parse_hoff (request_st * const restrict r, char * const restrict hdrs, const unsigned short * const restrict hoff, const int scheme_port) 1287 { 1288 /* 1289 * Request: "^(GET|POST|HEAD|...) ([^ ]+(\\?[^ ]+|)) (HTTP/1\\.[01])$" 1290 * Header : "^([-a-zA-Z]+): (.+)$" 1291 * End : "^$" 1292 */ 1293 1294 int status; 1295 const unsigned int http_parseopts = r->conf.http_parseopts; 1296 1297 status = http_request_parse_reqline(r, hdrs, hoff, http_parseopts); 1298 if (0 != status) return status; 1299 1300 status = http_request_parse_headers(r, hdrs, hoff, http_parseopts); 1301 if (0 != status) return status; 1302 1303 return http_request_parse(r, scheme_port); 1304 } 1305 1306 1307 static void 1308 http_request_headers_fin (request_st * const restrict r) 1309 { 1310 if (0 == r->http_status) { 1311 #if 0 1312 r->conditional_is_valid = (1 << COMP_SERVER_SOCKET) 1313 | (1 << COMP_HTTP_SCHEME) 1314 | (1 << COMP_HTTP_HOST) 1315 | (1 << COMP_HTTP_REMOTE_IP) 1316 | (1 << COMP_HTTP_REQUEST_METHOD) 1317 | (1 << COMP_HTTP_URL) 1318 | (1 << COMP_HTTP_QUERY_STRING) 1319 | (1 << COMP_HTTP_REQUEST_HEADER); 1320 #else 1321 /* all config conditions are valid after parsing header 1322 * (set all bits; remove dependency on plugin_config.h) */ 1323 r->conditional_is_valid = ~0u; 1324 #endif 1325 } 1326 else { 1327 r->keep_alive = 0; 1328 r->reqbody_length = 0; 1329 } 1330 } 1331 1332 1333 void 1334 http_request_headers_process (request_st * const restrict r, char * const restrict hdrs, const unsigned short * const restrict hoff, const int scheme_port) 1335 { 1336 r->http_status = http_request_parse_hoff(r, hdrs, hoff, scheme_port); 1337 1338 http_request_headers_fin(r); 1339 1340 if (__builtin_expect( (0 != r->http_status), 0)) { 1341 if (r->conf.log_request_header_on_error) { 1342 /*(http_request_parse_headers() modifies hdrs only to 1343 * undo line-wrapping in-place using spaces)*/ 1344 log_error_multiline(r->conf.errh, __FILE__, __LINE__, 1345 hdrs, r->rqst_header_len, "rqst: "); 1346 } 1347 } 1348 } 1349 1350 1351 void 1352 http_request_headers_process_h2 (request_st * const restrict r, const int scheme_port) 1353 { 1354 if (0 == r->http_status) 1355 r->http_status = http_request_parse(r, scheme_port); 1356 1357 if (0 == r->http_status) { 1358 if (light_btst(r->rqst_htags, HTTP_HEADER_CONNECTION)) 1359 r->http_status = http_request_header_line_invalid(r, 400, 1360 "invalid Connection header with HTTP/2 -> 400"); 1361 } 1362 1363 http_request_headers_fin(r); 1364 1365 /* limited; headers not collected into a single buf for HTTP/2 */ 1366 if (__builtin_expect( (0 != r->http_status), 0)) { 1367 if (r->conf.log_request_header_on_error) { 1368 log_error(r->conf.errh, __FILE__, __LINE__, 1369 "request-header:\n:authority: %s\n:method: %s\n:path: %s", 1370 r->http_host ? r->http_host->ptr : "", 1371 http_method_buf(r->http_method)->ptr, 1372 !buffer_is_blank(&r->target) ? r->target.ptr : ""); 1373 } 1374 } 1375 1376 /* ignore Upgrade if using HTTP/2 */ 1377 if (light_btst(r->rqst_htags, HTTP_HEADER_UPGRADE)) 1378 http_header_request_unset(r, HTTP_HEADER_UPGRADE, 1379 CONST_STR_LEN("upgrade")); 1380 /* XXX: should filter out other hop-by-hop connection headers, too */ 1381 } 1382