1 /* 2 * http_range - HTTP Range (RFC 7233) 3 * 4 * Copyright(c) 2015,2021 Glenn Strauss gstrauss()gluelogic.com All rights reserved 5 * License: BSD 3-clause (same as lighttpd) 6 */ 7 #include "http_range.h" 8 9 #include <limits.h> 10 #include <stdlib.h> /* strtol(), strtoll() */ 11 #include <string.h> /* memmove() */ 12 13 #include "buffer.h" 14 #include "chunk.h" 15 #include "http_header.h" 16 #include "request.h" 17 18 /* arbitrary limit for max num ranges (additional ranges are ignored) */ 19 #undef RMAX 20 #define RMAX 10 21 22 /* RFC 7233 Hypertext Transfer Protocol (HTTP/1.1): Range Requests 23 * https://tools.ietf.org/html/rfc7233 24 * Range requests are an OPTIONAL feature of HTTP, designed so that recipients 25 * not implementing this feature (or not supporting it for the target resource) 26 * can respond as if it is a normal GET request without impacting 27 * interoperability. */ 28 29 30 /* default: ignore Range with HTTP/1.0 requests */ 31 static int http_range_allow_http10; 32 void http_range_config_allow_http10 (int flag) 33 { 34 http_range_allow_http10 = flag; 35 } 36 37 38 __attribute_noinline__ 39 static int 40 http_range_coalesce (off_t * const restrict ranges, int n) 41 { 42 /* coalesce/combine overlapping ranges and ranges separated by a 43 * gap which is smaller than the overhead of sending multiple parts 44 * (typically around 80 bytes) ([RFC7233] 4.1 206 Partial Content) 45 * (ranges are known to be positive, so subtract 80 instead of add 80 46 * to avoid any chance of integer overflow) 47 * (n is limited to RMAX ranges (RMAX pairs of off_t) since a malicious set 48 * of ranges has n^2 cost for this simplistic algorithm) 49 * (sorting the ranges and then combining would lower the cost, but the 50 * cost should not be an issue since client should not send many ranges 51 * and we restrict the max number of ranges to limit abuse) 52 * [RFC7233] 4.1 206 Partial Content recommends: 53 * When a multipart response payload is generated, the server SHOULD send 54 * the parts in the same order that the corresponding byte-range-spec 55 * appeared in the received Range header field, excluding those ranges 56 * that were deemed unsatisfiable or that were coalesced into other ranges 57 */ 58 for (int i = 0; i+2 < n; i += 2) { 59 const off_t b = ranges[i]; 60 const off_t e = ranges[i+1]; 61 for (int j = i+2; j < n; j += 2) { 62 /* common case: ranges do not overlap */ 63 if (b <= ranges[j] ? e < ranges[j]-80 : ranges[j+1] < b-80) 64 continue; 65 /* else ranges do overlap, so combine into first range */ 66 ranges[i] = b <= ranges[j] ? b : ranges[j]; 67 ranges[i+1] = e >= ranges[j+1] ? e : ranges[j+1]; 68 memmove(ranges+j, ranges+j+2, (n-j-2)*sizeof(off_t)); 69 /* restart outer loop from beginning */ 70 n -= 2; 71 i = -2; 72 break; 73 } 74 } 75 76 return n; 77 } 78 79 80 static const char * 81 http_range_parse_next (const char * restrict s, const off_t len, 82 off_t * const restrict ranges) 83 { 84 /*(caller must check retured ranges[1] != -1, or else range was invalid)*/ 85 86 /*assert(len > 0);*//*(caller must ensure len > 0)*/ 87 char *e; 88 off_t n = strtoll(s, &e, 10); 89 ranges[1] = -1; /* invalid */ 90 if (n >= 0) { 91 if (n != LLONG_MAX && n < len && s != e) { 92 ranges[0] = n; 93 while (*e == ' ' || *e == '\t') ++e; 94 if (*e == '-') { 95 n = strtoll((s = e+1), &e, 10); 96 if (s == e || (n == 0 && e[-1] != '0')) 97 ranges[1] = len-1; 98 else if (ranges[0] <= n && n != LLONG_MAX) 99 ranges[1] = n < len ? n : len-1; 100 } 101 } 102 } 103 else if (n != LLONG_MIN) { 104 ranges[0] = len > -n ? len + n : 0;/*('n' is negative here)*/ 105 ranges[1] = len-1; 106 } 107 while (*e == ' ' || *e == '\t') ++e; 108 return e; /* ',' or '\0' or else invalid char in range request */ 109 } 110 111 112 static int 113 http_range_parse (const char * restrict s, const off_t content_length, 114 off_t ranges[RMAX*2]) 115 { 116 /* [RFC7233] 2.1 Byte Ranges 117 * If a valid byte-range-set includes at least one byte-range-spec 118 * with a first-byte-pos that is less than the current length of 119 * the representation, or at least one suffix-byte-range-spec with 120 * a non-zero suffix-length, then the byte-range-set is satisfiable. 121 * Otherwise, the byte-range-set is unsatisfiable. 122 * 123 * [RFC7233] 3.1 Range 124 * A server that supports range requests MAY ignore or reject a Range 125 * header field that consists of more than two overlapping ranges, or a 126 * set of many small ranges that are not listed in ascending order, 127 * since both are indications of either a broken client or a deliberate 128 * denial-of-service attack (Section 6.1). A client SHOULD NOT request 129 * multiple ranges that are inherently less efficient to process and 130 * transfer than a single range that encompasses the same data. 131 */ 132 int n = 0; 133 do { 134 s = http_range_parse_next(s, content_length, ranges+n); 135 if ((*s == '\0' || *s == ',') && ranges[n+1] != -1) 136 n += 2; 137 else 138 while (*s != '\0' && *s != ',') ++s; /*ignore invalid ranges*/ 139 } while (*s++ != '\0' && n < RMAX*2); 140 /*(*s++ for multipart, increment to char after ',')*/ 141 142 if (n <= 2) 143 return n; 144 145 /* error if n == 0 (no valid ranges) 146 * (if n == RMAX*2 (additional ranges > RMAX limit, if any, were ignored))*/ 147 return http_range_coalesce(ranges, n); 148 } 149 150 151 static void 152 http_range_single (request_st * const r, const off_t ranges[2]) 153 { 154 /* caller already checked: n == 2, content_length > 0, ranges valid */ 155 chunkqueue * const restrict cq = &r->write_queue; 156 const off_t complete_length = chunkqueue_length(cq); 157 /* add Content-Range header */ 158 /*(large enough for "bytes X-X/X" with 3 huge numbers)*/ 159 uint32_t len = sizeof("bytes ")-1; 160 char cr[72] = "bytes "; 161 len += (uint32_t)li_itostrn(cr+len, sizeof(cr)-len, ranges[0]); 162 cr[len++] = '-'; 163 len += (uint32_t)li_itostrn(cr+len, sizeof(cr)-len, ranges[1]); 164 cr[len++] = '/'; 165 len += (uint32_t)li_itostrn(cr+len, sizeof(cr)-len, complete_length); 166 http_header_response_set(r, HTTP_HEADER_CONTENT_RANGE, 167 CONST_STR_LEN("Content-Range"), cr, len); 168 if (cq->first == cq->last) { /* single chunk in cq */ 169 /* consume from cq to start of range, truncate after end of range */ 170 if (ranges[0]) { 171 chunkqueue_mark_written(cq, ranges[0]); 172 cq->bytes_out -= ranges[0]; 173 cq->bytes_in -= ranges[0]; 174 } 175 cq->bytes_in -= complete_length - (ranges[1] + 1); 176 chunk * const c = cq->first; 177 if (c->type == FILE_CHUNK) 178 c->file.length = c->offset + ranges[1] - ranges[0] + 1; 179 else /*(c->type == MEM_CHUNK)*/ 180 c->mem->used = c->offset + ranges[1] - ranges[0] + 1 + 1; 181 } 182 else { 183 /* transfer contents to temporary cq, then transfer range back */ 184 chunkqueue tq; 185 memset(&tq, 0, sizeof(tq)); 186 chunkqueue_steal(&tq, cq, complete_length); 187 cq->bytes_out -= complete_length; 188 cq->bytes_in -= complete_length; 189 chunkqueue_mark_written(&tq, ranges[0]); 190 chunkqueue_steal(cq, &tq, ranges[1] - ranges[0] + 1); 191 chunkqueue_reset(&tq); 192 } 193 } 194 195 196 __attribute_cold__ 197 static void 198 http_range_multi (request_st * const r, 199 const off_t ranges[RMAX*2], const int n) 200 { 201 /* multiple ranges that are not ordered are not expected to be common, 202 * so those scenarios is not optimized here */ 203 #define HTTP_MULTIPART_BOUNDARY "fkj49sn38dcn3" 204 static const char boundary_prefix[] = 205 "\r\n--" HTTP_MULTIPART_BOUNDARY; 206 static const char boundary_end[] = 207 "\r\n--" HTTP_MULTIPART_BOUNDARY "--\r\n"; 208 static const char multipart_type[] = 209 "multipart/byteranges; boundary=" HTTP_MULTIPART_BOUNDARY; 210 211 buffer * const tb = r->tmp_buf; 212 buffer_copy_string_len(tb, CONST_STR_LEN(boundary_prefix)); 213 const buffer * const content_type = 214 http_header_response_get(r, HTTP_HEADER_CONTENT_TYPE, 215 CONST_STR_LEN("Content-Type")); 216 if (content_type) { 217 buffer_append_str2(tb, CONST_STR_LEN("\r\nContent-Type: "), 218 BUF_PTR_LEN(content_type)); 219 } 220 buffer_append_string_len(tb,CONST_STR_LEN("\r\nContent-Range: bytes ")); 221 const uint32_t prefix_len = buffer_clen(tb); 222 223 http_header_response_set(r, HTTP_HEADER_CONTENT_TYPE, 224 CONST_STR_LEN("Content-Type"), 225 CONST_STR_LEN(multipart_type)); 226 227 /* caller already checked: n > 2, content_length > 0, ranges valid */ 228 chunkqueue * const restrict cq = &r->write_queue; 229 const off_t complete_length = chunkqueue_length(cq); 230 231 /* copy chunks for ranges to end of cq, then consume original chunks 232 * 233 * future: if ranges ordered, could use technique in http_range_single(), 234 * but [RFC7233] 4.1 206 Partial Content recommends: 235 * When a multipart response payload is generated, the server SHOULD send 236 * the parts in the same order that the corresponding byte-range-spec 237 * appeared in the received Range header field, excluding those ranges 238 * that were deemed unsatisfiable or that were coalesced into other ranges 239 * and this code path is not expected to be hot, and so not optimized. 240 */ 241 242 chunk * const c = (cq->first == cq->last && cq->first->type == MEM_CHUNK) 243 ? cq->first 244 : NULL; 245 for (int i = 0; i < n; i += 2) { 246 /* generate boundary-header including Content-Type and Content-Range */ 247 buffer_truncate(tb, prefix_len); 248 buffer_append_int(tb, ranges[i]); 249 buffer_append_string_len(tb, CONST_STR_LEN("-")); 250 buffer_append_int(tb, ranges[i+1]); 251 buffer_append_string_len(tb, CONST_STR_LEN("/")); 252 buffer_append_int(tb, complete_length); 253 buffer_append_string_len(tb, CONST_STR_LEN("\r\n\r\n")); 254 if (c) /* single MEM_CHUNK in original cq; not using mem_min */ 255 chunkqueue_append_mem(cq, BUF_PTR_LEN(tb)); 256 else 257 chunkqueue_append_mem_min(cq, BUF_PTR_LEN(tb)); 258 259 chunkqueue_append_cq_range(cq, cq, ranges[i], 260 ranges[i+1] - ranges[i] + 1); 261 } 262 263 /* add boundary end */ 264 chunkqueue_append_mem_min(cq, CONST_STR_LEN(boundary_end)); 265 266 /* remove initial chunk(s), since duplicated into multipart ranges */ 267 /* remove initial "\r\n" in front of first boundary string */ 268 chunkqueue_mark_written(cq, complete_length+2); 269 cq->bytes_out -= complete_length+2; 270 cq->bytes_in -= complete_length+2; 271 } 272 273 274 __attribute_cold__ 275 static int 276 http_range_not_satisfiable (request_st * const r, const off_t content_length) 277 { 278 /*(large enough for "bytes '*'/X" with 1 huge number)*/ 279 uint32_t len = sizeof("bytes */")-1; 280 char cr[32] = "bytes */"; 281 len += (uint32_t)li_itostrn(cr+len, sizeof(cr)-len, content_length); 282 http_header_response_set(r, HTTP_HEADER_CONTENT_RANGE, 283 CONST_STR_LEN("Content-Range"), cr, len); 284 r->handler_module = NULL; 285 return (r->http_status = 416); /* Range Not Satisfiable */ 286 } 287 288 289 __attribute_noinline__ 290 static int 291 http_range_process (request_st * const r, const buffer * const http_range) 292 { 293 const off_t content_length = chunkqueue_length(&r->write_queue); 294 if (0 == content_length) /*(implementation detail; see comment at top)*/ 295 return r->http_status; /* skip Range handling if empty payload */ 296 /* future: might skip Range if content_length is below threshold, e.g. 1k */ 297 298 /* An origin server MUST ignore a Range header field that contains a 299 * range unit it does not understand. */ 300 if (buffer_clen(http_range) < sizeof("bytes=")-1 301 || !buffer_eq_icase_ssn(http_range->ptr, "bytes=", sizeof("bytes=")-1)) 302 return r->http_status; /* 200 OK */ 303 304 /* arbitrary limit: support up to RMAX ranges in request Range field 305 * (validating and coalescing overlapping ranges is not a linear algorithm) 306 * (use RMAX pairs of off_t to indicate too many ranges ( >= RMAX*2)) */ 307 off_t ranges[RMAX*2]; 308 const int n = http_range_parse(http_range->ptr+sizeof("bytes=")-1, 309 content_length, ranges); 310 311 /* checked above: content_length > 0, ranges valid */ 312 if (2 == n) /* single range */ 313 http_range_single(r, ranges); 314 else if (0 == n) /* 416 Range Not Satisfiable */ 315 return http_range_not_satisfiable(r, content_length); 316 else /* multipart ranges */ 317 http_range_multi(r, ranges, n); 318 319 /*(must either set Content-Length or unset prior value, if any)*/ 320 buffer_append_int( 321 http_header_response_set_ptr(r, HTTP_HEADER_CONTENT_LENGTH, 322 CONST_STR_LEN("Content-Length")), 323 chunkqueue_length(&r->write_queue)); 324 325 return (r->http_status = 206); /* 206 Partial Content */ 326 } 327 328 329 int 330 http_range_rfc7233 (request_st * const r) 331 { 332 const int http_status = r->http_status; 333 334 /* implementation limitation: 335 * limit range handling to when we have complete response 336 * (future: might extend this to streamed files if Content-Length known) 337 * (otherwise, might be unable to validate Range before send response header 338 * e.g. unable to handle suffix-byte-range-spec without entity length)*/ 339 if (!r->resp_body_finished) 340 return http_status; 341 342 /* limit range handling to 200 responses 343 * [RFC7233] 3.1 Range 344 * The Range header field is evaluated after evaluating the precondition 345 * header fields defined in [RFC7232], and only if the result in absence 346 * of the Range header field would be a 200 (OK) response. */ 347 if (200 != http_status) 348 return http_status; 349 /* limit range handling to GET and HEAD (further limited below to GET) 350 * [RFC7233] 3.1 Range 351 * A server MUST ignore a Range header field received with a request 352 * method other than GET. 353 */ 354 if (!http_method_get_or_head(r->http_method)) 355 return http_status; 356 /* no "Range" in HTTP/1.0 */ 357 if (r->http_version < HTTP_VERSION_1_1) 358 if (!http_range_allow_http10) 359 return http_status; 360 /* do not attempt to handle range if Transfer-Encoding already applied. 361 * skip Range processing if Content-Encoding has already been applied, 362 * since Range is on the unencoded content length and Content-Encoding 363 * might change content. This includes if mod_deflate has applied 364 * Content-Encoding. If Transfer-Encoding: gzip, chunked were used 365 * instead (not done), then Range processing could safely take place, too, 366 * (but mod_deflate would need to run from new hook to handle TE: gzip). 367 * Alternatively, lighttpd.conf could be configured to disable mod_deflate 368 * for Range requests: 369 * $REQUEST_HEADER["Range"] != "" { deflate.mimetypes = () } 370 */ 371 if ((r->resp_htags 372 & (light_bshift(HTTP_HEADER_TRANSFER_ENCODING) 373 |light_bshift(HTTP_HEADER_CONTENT_ENCODING)))) 374 return http_status; 375 #if 0 /*(if Range already handled, HTTP status expected to be 206, not 200)*/ 376 /* check if Range request already handled (single range or multipart)*/ 377 if (light_btst(r->resp_htags, HTTP_HEADER_CONTENT_RANGE)) 378 return http_status; 379 const buffer * const content_type = 380 http_header_response_get(r, HTTP_HEADER_CONTENT_TYPE, 381 CONST_STR_LEN("Content-Type")); 382 if (content_type 383 && buffer_clen(content_type) >= sizeof("multipart/byteranges")-1 384 && buffer_eq_icase_ssn(content_type->ptr, "multipart/byteranges", 385 sizeof("multipart/byteranges")-1)) 386 return http_status; 387 #endif 388 389 /* optional: advertise Accept-Ranges: bytes 390 * Even if "Accept-Ranges: bytes" is not given, 391 * [RFC7233] 2.3 Accept-Ranges 392 * A client MAY generate range requests without having received this 393 * header for the resource involved. 394 */ 395 if (!light_btst(r->resp_htags, HTTP_HEADER_ACCEPT_RANGES)) 396 http_header_response_set(r, HTTP_HEADER_ACCEPT_RANGES, 397 CONST_STR_LEN("Accept-Ranges"), 398 CONST_STR_LEN("bytes")); 399 else { 400 const buffer * const accept_ranges = 401 http_header_response_get(r, HTTP_HEADER_ACCEPT_RANGES, 402 CONST_STR_LEN("Accept-Ranges")); 403 #ifdef __COVERITY__ 404 force_assert(accept_ranges); /*(r->resp_htags checked above)*/ 405 #endif 406 if (buffer_eq_slen(accept_ranges, CONST_STR_LEN("none"))) 407 return http_status; 408 } 409 410 /* limit range handling to GET 411 * [RFC7233] 3.1 Range 412 * A server MUST ignore a Range header field received with a request 413 * method other than GET. 414 */ 415 if (r->http_method != HTTP_METHOD_GET) 416 return http_status; 417 418 /* check for Range request */ 419 const buffer * const http_range = 420 http_header_request_get(r, HTTP_HEADER_RANGE, CONST_STR_LEN("Range")); 421 if (!http_range) 422 return http_status; 423 424 /* [RFC7233] 3.2 If-Range 425 * If-Range = entity-tag / HTTP-date 426 * A client MUST NOT generate an If-Range header field containing an 427 * entity-tag that is marked as weak. 428 * [...] 429 * Note that this comparison by exact match, including when the validator 430 * is an HTTP-date, differs from the "earlier than or equal to" comparison 431 * used when evaluating an If-Unmodified-Since conditional. */ 432 if (light_btst(r->rqst_htags, HTTP_HEADER_IF_RANGE)) { 433 const buffer * const if_range = 434 http_header_request_get(r, HTTP_HEADER_IF_RANGE, 435 CONST_STR_LEN("If-Range")); 436 #ifdef __COVERITY__ 437 force_assert(if_range); /*(r->rqst_htags checked above)*/ 438 #endif 439 /* (weak ETag W/"<etag>" will not match Last-Modified) */ 440 const buffer * const cmp = (if_range->ptr[0] == '"') 441 ? http_header_response_get(r, HTTP_HEADER_ETAG, 442 CONST_STR_LEN("ETag")) 443 : http_header_response_get(r, HTTP_HEADER_LAST_MODIFIED, 444 CONST_STR_LEN("Last-Modified")); 445 if (!cmp || !buffer_is_equal(if_range, cmp)) 446 return http_status; 447 448 #if 0 /*(questionable utility; not deemed worthwhile)*/ 449 /* (In a modular server, the following RFC recommentation might be 450 * expensive and invasive to implement perfectly, so only making an 451 * effort here to comply with known headers added within this routine 452 * and within the purview of Range requests) 453 * [RFC7233] 4.1 206 Partial Content 454 * If a 206 is generated in response to a request with an If-Range 455 * header field, the sender SHOULD NOT generate other representation 456 * header fields beyond those required above, because the client is 457 * understood to already have a prior response containing those header 458 * fields. 459 */ 460 http_header_response_unset(r, HTTP_HEADER_ACCEPT_RANGES, 461 CONST_STR_LEN("Accept-Ranges")); 462 #endif 463 } 464 465 return http_range_process(r, http_range); 466 } 467