1 /*
2 * http_range - HTTP Range (RFC 7233)
3 *
4 * Copyright(c) 2015,2021 Glenn Strauss gstrauss()gluelogic.com All rights reserved
5 * License: BSD 3-clause (same as lighttpd)
6 */
7 #include "http_range.h"
8
9 #include <limits.h>
10 #include <stdlib.h> /* strtol(), strtoll() */
11 #include <string.h> /* memmove() */
12
13 #include "buffer.h"
14 #include "chunk.h"
15 #include "http_header.h"
16 #include "request.h"
17
18 /* arbitrary limit for max num ranges (additional ranges are ignored) */
19 #undef RMAX
20 #define RMAX 10
21
22 /* RFC 7233 Hypertext Transfer Protocol (HTTP/1.1): Range Requests
23 * https://tools.ietf.org/html/rfc7233
24 * Range requests are an OPTIONAL feature of HTTP, designed so that recipients
25 * not implementing this feature (or not supporting it for the target resource)
26 * can respond as if it is a normal GET request without impacting
27 * interoperability. */
28
29
30 /* default: ignore Range with HTTP/1.0 requests */
31 static int http_range_allow_http10;
http_range_config_allow_http10(int flag)32 void http_range_config_allow_http10 (int flag)
33 {
34 http_range_allow_http10 = flag;
35 }
36
37
38 __attribute_noinline__
39 static int
http_range_coalesce(off_t * const restrict ranges,int n)40 http_range_coalesce (off_t * const restrict ranges, int n)
41 {
42 /* coalesce/combine overlapping ranges and ranges separated by a
43 * gap which is smaller than the overhead of sending multiple parts
44 * (typically around 80 bytes) ([RFC7233] 4.1 206 Partial Content)
45 * (ranges are known to be positive, so subtract 80 instead of add 80
46 * to avoid any chance of integer overflow)
47 * (n is limited to RMAX ranges (RMAX pairs of off_t) since a malicious set
48 * of ranges has n^2 cost for this simplistic algorithm)
49 * (sorting the ranges and then combining would lower the cost, but the
50 * cost should not be an issue since client should not send many ranges
51 * and we restrict the max number of ranges to limit abuse)
52 * [RFC7233] 4.1 206 Partial Content recommends:
53 * When a multipart response payload is generated, the server SHOULD send
54 * the parts in the same order that the corresponding byte-range-spec
55 * appeared in the received Range header field, excluding those ranges
56 * that were deemed unsatisfiable or that were coalesced into other ranges
57 */
58 for (int i = 0; i+2 < n; i += 2) {
59 const off_t b = ranges[i];
60 const off_t e = ranges[i+1];
61 for (int j = i+2; j < n; j += 2) {
62 /* common case: ranges do not overlap */
63 if (b <= ranges[j] ? e < ranges[j]-80 : ranges[j+1] < b-80)
64 continue;
65 /* else ranges do overlap, so combine into first range */
66 ranges[i] = b <= ranges[j] ? b : ranges[j];
67 ranges[i+1] = e >= ranges[j+1] ? e : ranges[j+1];
68 memmove(ranges+j, ranges+j+2, (n-j-2)*sizeof(off_t));
69 /* restart outer loop from beginning */
70 n -= 2;
71 i = -2;
72 break;
73 }
74 }
75
76 return n;
77 }
78
79
80 static const char *
http_range_parse_next(const char * restrict s,const off_t len,off_t * const restrict ranges)81 http_range_parse_next (const char * restrict s, const off_t len,
82 off_t * const restrict ranges)
83 {
84 /*(caller must check returned ranges[1] != -1, or else range was invalid)*/
85
86 /*assert(len > 0);*//*(caller must ensure len > 0)*/
87 char *e;
88 off_t n = strtoll(s, &e, 10);
89 ranges[1] = -1; /* invalid */
90 if (n >= 0) {
91 if (n != LLONG_MAX && n < len && s != e) {
92 ranges[0] = n;
93 while (*e == ' ' || *e == '\t') ++e;
94 if (*e == '-') {
95 n = strtoll((s = e+1), &e, 10);
96 if (s == e || (n == 0 && e[-1] != '0'))
97 ranges[1] = len-1;
98 else if (ranges[0] <= n && n != LLONG_MAX)
99 ranges[1] = n < len ? n : len-1;
100 }
101 }
102 }
103 else if (n != LLONG_MIN) {
104 ranges[0] = len > -n ? len + n : 0;/*('n' is negative here)*/
105 ranges[1] = len-1;
106 }
107 while (*e == ' ' || *e == '\t') ++e;
108 return e; /* ',' or '\0' or else invalid char in range request */
109 }
110
111
112 static int
http_range_parse(const char * restrict s,const off_t content_length,off_t ranges[RMAX * 2])113 http_range_parse (const char * restrict s, const off_t content_length,
114 off_t ranges[RMAX*2])
115 {
116 /* [RFC7233] 2.1 Byte Ranges
117 * If a valid byte-range-set includes at least one byte-range-spec
118 * with a first-byte-pos that is less than the current length of
119 * the representation, or at least one suffix-byte-range-spec with
120 * a non-zero suffix-length, then the byte-range-set is satisfiable.
121 * Otherwise, the byte-range-set is unsatisfiable.
122 *
123 * [RFC7233] 3.1 Range
124 * A server that supports range requests MAY ignore or reject a Range
125 * header field that consists of more than two overlapping ranges, or a
126 * set of many small ranges that are not listed in ascending order,
127 * since both are indications of either a broken client or a deliberate
128 * denial-of-service attack (Section 6.1). A client SHOULD NOT request
129 * multiple ranges that are inherently less efficient to process and
130 * transfer than a single range that encompasses the same data.
131 */
132 int n = 0;
133 do {
134 s = http_range_parse_next(s, content_length, ranges+n);
135 if ((*s == '\0' || *s == ',') && ranges[n+1] != -1)
136 n += 2;
137 else
138 while (*s != '\0' && *s != ',') ++s; /*ignore invalid ranges*/
139 } while (*s++ != '\0' && n < RMAX*2);
140 /*(*s++ for multipart, increment to char after ',')*/
141
142 if (n <= 2)
143 return n;
144
145 /* error if n == 0 (no valid ranges)
146 * (if n == RMAX*2 (additional ranges > RMAX limit, if any, were ignored))*/
147 return http_range_coalesce(ranges, n);
148 }
149
150
151 static void
http_range_single(request_st * const r,const off_t ranges[2])152 http_range_single (request_st * const r, const off_t ranges[2])
153 {
154 /* caller already checked: n == 2, content_length > 0, ranges valid */
155 chunkqueue * const restrict cq = &r->write_queue;
156 const off_t complete_length = chunkqueue_length(cq);
157 /* add Content-Range header */
158 /*(large enough for "bytes X-X/X" with 3 huge numbers)*/
159 uint32_t len = sizeof("bytes ")-1;
160 char cr[72] = "bytes ";
161 len += (uint32_t)li_itostrn(cr+len, sizeof(cr)-len, ranges[0]);
162 cr[len++] = '-';
163 len += (uint32_t)li_itostrn(cr+len, sizeof(cr)-len, ranges[1]);
164 cr[len++] = '/';
165 len += (uint32_t)li_itostrn(cr+len, sizeof(cr)-len, complete_length);
166 http_header_response_set(r, HTTP_HEADER_CONTENT_RANGE,
167 CONST_STR_LEN("Content-Range"), cr, len);
168 if (cq->first == cq->last) { /* single chunk in cq */
169 /* consume from cq to start of range, truncate after end of range */
170 if (ranges[0]) {
171 chunkqueue_mark_written(cq, ranges[0]);
172 cq->bytes_out -= ranges[0];
173 cq->bytes_in -= ranges[0];
174 }
175 cq->bytes_in -= complete_length - (ranges[1] + 1);
176 chunk * const c = cq->first;
177 if (c->type == FILE_CHUNK)
178 c->file.length = c->offset + ranges[1] - ranges[0] + 1;
179 else /*(c->type == MEM_CHUNK)*/
180 c->mem->used = c->offset + ranges[1] - ranges[0] + 1 + 1;
181 }
182 else {
183 /* transfer contents to temporary cq, then transfer range back */
184 chunkqueue tq;
185 memset(&tq, 0, sizeof(tq));
186 chunkqueue_steal(&tq, cq, complete_length);
187 cq->bytes_out -= complete_length;
188 cq->bytes_in -= complete_length;
189 chunkqueue_mark_written(&tq, ranges[0]);
190 chunkqueue_steal(cq, &tq, ranges[1] - ranges[0] + 1);
191 chunkqueue_reset(&tq);
192 }
193 }
194
195
196 __attribute_cold__
197 static void
http_range_multi(request_st * const r,const off_t ranges[RMAX * 2],const int n)198 http_range_multi (request_st * const r,
199 const off_t ranges[RMAX*2], const int n)
200 {
201 /* multiple ranges that are not ordered are not expected to be common,
202 * so those scenarios is not optimized here */
203 #define HTTP_MULTIPART_BOUNDARY "fkj49sn38dcn3"
204 static const char boundary_prefix[] =
205 "\r\n--" HTTP_MULTIPART_BOUNDARY;
206 static const char boundary_end[] =
207 "\r\n--" HTTP_MULTIPART_BOUNDARY "--\r\n";
208 static const char multipart_type[] =
209 "multipart/byteranges; boundary=" HTTP_MULTIPART_BOUNDARY;
210
211 buffer * const tb = r->tmp_buf;
212 buffer_copy_string_len(tb, CONST_STR_LEN(boundary_prefix));
213 const buffer * const content_type =
214 http_header_response_get(r, HTTP_HEADER_CONTENT_TYPE,
215 CONST_STR_LEN("Content-Type"));
216 if (content_type) {
217 buffer_append_str2(tb, CONST_STR_LEN("\r\nContent-Type: "),
218 BUF_PTR_LEN(content_type));
219 }
220 buffer_append_string_len(tb,CONST_STR_LEN("\r\nContent-Range: bytes "));
221 const uint32_t prefix_len = buffer_clen(tb);
222
223 http_header_response_set(r, HTTP_HEADER_CONTENT_TYPE,
224 CONST_STR_LEN("Content-Type"),
225 CONST_STR_LEN(multipart_type));
226
227 /* caller already checked: n > 2, content_length > 0, ranges valid */
228 chunkqueue * const restrict cq = &r->write_queue;
229 const off_t complete_length = chunkqueue_length(cq);
230
231 /* copy chunks for ranges to end of cq, then consume original chunks
232 *
233 * future: if ranges ordered, could use technique in http_range_single(),
234 * but [RFC7233] 4.1 206 Partial Content recommends:
235 * When a multipart response payload is generated, the server SHOULD send
236 * the parts in the same order that the corresponding byte-range-spec
237 * appeared in the received Range header field, excluding those ranges
238 * that were deemed unsatisfiable or that were coalesced into other ranges
239 * and this code path is not expected to be hot, and so not optimized.
240 */
241
242 chunk * const c = (cq->first == cq->last && cq->first->type == MEM_CHUNK)
243 ? cq->first
244 : NULL;
245 for (int i = 0; i < n; i += 2) {
246 /* generate boundary-header including Content-Type and Content-Range */
247 buffer_truncate(tb, prefix_len);
248 buffer_append_int(tb, ranges[i]);
249 buffer_append_char(tb, '-');
250 buffer_append_int(tb, ranges[i+1]);
251 buffer_append_char(tb, '/');
252 buffer_append_int(tb, complete_length);
253 buffer_append_string_len(tb, CONST_STR_LEN("\r\n\r\n"));
254 if (c) /* single MEM_CHUNK in original cq; not using mem_min */
255 chunkqueue_append_mem(cq, BUF_PTR_LEN(tb));
256 else
257 chunkqueue_append_mem_min(cq, BUF_PTR_LEN(tb));
258
259 chunkqueue_append_cq_range(cq, cq, ranges[i],
260 ranges[i+1] - ranges[i] + 1);
261 }
262
263 /* add boundary end */
264 chunkqueue_append_mem_min(cq, CONST_STR_LEN(boundary_end));
265
266 /* remove initial chunk(s), since duplicated into multipart ranges */
267 /* remove initial "\r\n" in front of first boundary string */
268 chunkqueue_mark_written(cq, complete_length+2);
269 cq->bytes_out -= complete_length+2;
270 cq->bytes_in -= complete_length+2;
271 }
272
273
274 __attribute_cold__
275 static int
http_range_not_satisfiable(request_st * const r,const off_t content_length)276 http_range_not_satisfiable (request_st * const r, const off_t content_length)
277 {
278 /*(large enough for "bytes '*'/X" with 1 huge number)*/
279 uint32_t len = sizeof("bytes */")-1;
280 char cr[32] = "bytes */";
281 len += (uint32_t)li_itostrn(cr+len, sizeof(cr)-len, content_length);
282 http_header_response_set(r, HTTP_HEADER_CONTENT_RANGE,
283 CONST_STR_LEN("Content-Range"), cr, len);
284 r->handler_module = NULL;
285 return (r->http_status = 416); /* Range Not Satisfiable */
286 }
287
288
289 __attribute_noinline__
290 static int
http_range_process(request_st * const r,const buffer * const http_range)291 http_range_process (request_st * const r, const buffer * const http_range)
292 {
293 const off_t content_length = chunkqueue_length(&r->write_queue);
294 if (0 == content_length) /*(implementation detail; see comment at top)*/
295 return r->http_status; /* skip Range handling if empty payload */
296 /* future: might skip Range if content_length is below threshold, e.g. 1k */
297
298 /* An origin server MUST ignore a Range header field that contains a
299 * range unit it does not understand. */
300 if (buffer_clen(http_range) < sizeof("bytes=")-1
301 || !buffer_eq_icase_ssn(http_range->ptr, "bytes=", sizeof("bytes=")-1))
302 return r->http_status; /* 200 OK */
303
304 /* arbitrary limit: support up to RMAX ranges in request Range field
305 * (validating and coalescing overlapping ranges is not a linear algorithm)
306 * (use RMAX pairs of off_t to indicate too many ranges ( >= RMAX*2)) */
307 off_t ranges[RMAX*2];
308 const int n = http_range_parse(http_range->ptr+sizeof("bytes=")-1,
309 content_length, ranges);
310
311 /* checked above: content_length > 0, ranges valid */
312 if (2 == n) /* single range */
313 http_range_single(r, ranges);
314 else if (0 == n) /* 416 Range Not Satisfiable */
315 return http_range_not_satisfiable(r, content_length);
316 else /* multipart ranges */
317 http_range_multi(r, ranges, n);
318
319 /*(must either set Content-Length or unset prior value, if any)*/
320 buffer_append_int(
321 http_header_response_set_ptr(r, HTTP_HEADER_CONTENT_LENGTH,
322 CONST_STR_LEN("Content-Length")),
323 chunkqueue_length(&r->write_queue));
324
325 return (r->http_status = 206); /* 206 Partial Content */
326 }
327
328
329 int
http_range_rfc7233(request_st * const r)330 http_range_rfc7233 (request_st * const r)
331 {
332 const int http_status = r->http_status;
333
334 /* implementation limitation:
335 * limit range handling to when we have complete response
336 * (future: might extend this to streamed files if Content-Length known)
337 * (otherwise, might be unable to validate Range before send response header
338 * e.g. unable to handle suffix-byte-range-spec without entity length)*/
339 if (!r->resp_body_finished)
340 return http_status;
341
342 /* limit range handling to 200 responses
343 * [RFC7233] 3.1 Range
344 * The Range header field is evaluated after evaluating the precondition
345 * header fields defined in [RFC7232], and only if the result in absence
346 * of the Range header field would be a 200 (OK) response. */
347 if (200 != http_status)
348 return http_status;
349 /* limit range handling to GET and HEAD (further limited below to GET)
350 * [RFC7233] 3.1 Range
351 * A server MUST ignore a Range header field received with a request
352 * method other than GET.
353 */
354 if (!http_method_get_or_head(r->http_method))
355 return http_status;
356 /* no "Range" in HTTP/1.0 */
357 if (r->http_version < HTTP_VERSION_1_1)
358 if (!http_range_allow_http10)
359 return http_status;
360 /* do not attempt to handle range if Transfer-Encoding already applied.
361 * skip Range processing if Content-Encoding has already been applied,
362 * since Range is on the unencoded content length and Content-Encoding
363 * might change content. This includes if mod_deflate has applied
364 * Content-Encoding. If Transfer-Encoding: gzip, chunked were used
365 * instead (not done), then Range processing could safely take place, too,
366 * (but mod_deflate would need to run from new hook to handle TE: gzip).
367 * Alternatively, lighttpd.conf could be configured to disable mod_deflate
368 * for Range requests:
369 * $REQUEST_HEADER["Range"] != "" { deflate.mimetypes = () }
370 */
371 if ((r->resp_htags
372 & (light_bshift(HTTP_HEADER_TRANSFER_ENCODING)
373 |light_bshift(HTTP_HEADER_CONTENT_ENCODING))))
374 return http_status;
375 #if 0 /*(if Range already handled, HTTP status expected to be 206, not 200)*/
376 /* check if Range request already handled (single range or multipart)*/
377 if (light_btst(r->resp_htags, HTTP_HEADER_CONTENT_RANGE))
378 return http_status;
379 const buffer * const content_type =
380 http_header_response_get(r, HTTP_HEADER_CONTENT_TYPE,
381 CONST_STR_LEN("Content-Type"));
382 if (content_type
383 && buffer_clen(content_type) >= sizeof("multipart/byteranges")-1
384 && buffer_eq_icase_ssn(content_type->ptr, "multipart/byteranges",
385 sizeof("multipart/byteranges")-1))
386 return http_status;
387 #endif
388
389 /* optional: advertise Accept-Ranges: bytes
390 * Even if "Accept-Ranges: bytes" is not given,
391 * [RFC7233] 2.3 Accept-Ranges
392 * A client MAY generate range requests without having received this
393 * header for the resource involved.
394 */
395 if (!light_btst(r->resp_htags, HTTP_HEADER_ACCEPT_RANGES))
396 http_header_response_set(r, HTTP_HEADER_ACCEPT_RANGES,
397 CONST_STR_LEN("Accept-Ranges"),
398 CONST_STR_LEN("bytes"));
399 else {
400 const buffer * const accept_ranges =
401 http_header_response_get(r, HTTP_HEADER_ACCEPT_RANGES,
402 CONST_STR_LEN("Accept-Ranges"));
403 #ifdef __COVERITY__
404 force_assert(accept_ranges); /*(r->resp_htags checked above)*/
405 #endif
406 if (buffer_eq_slen(accept_ranges, CONST_STR_LEN("none")))
407 return http_status;
408 }
409
410 /* limit range handling to GET
411 * [RFC7233] 3.1 Range
412 * A server MUST ignore a Range header field received with a request
413 * method other than GET.
414 */
415 if (r->http_method != HTTP_METHOD_GET)
416 return http_status;
417
418 /* check for Range request */
419 const buffer * const http_range =
420 http_header_request_get(r, HTTP_HEADER_RANGE, CONST_STR_LEN("Range"));
421 if (!http_range)
422 return http_status;
423
424 /* [RFC7233] 3.2 If-Range
425 * If-Range = entity-tag / HTTP-date
426 * A client MUST NOT generate an If-Range header field containing an
427 * entity-tag that is marked as weak.
428 * [...]
429 * Note that this comparison by exact match, including when the validator
430 * is an HTTP-date, differs from the "earlier than or equal to" comparison
431 * used when evaluating an If-Unmodified-Since conditional. */
432 if (light_btst(r->rqst_htags, HTTP_HEADER_IF_RANGE)) {
433 const buffer * const if_range =
434 http_header_request_get(r, HTTP_HEADER_IF_RANGE,
435 CONST_STR_LEN("If-Range"));
436 #ifdef __COVERITY__
437 force_assert(if_range); /*(r->rqst_htags checked above)*/
438 #endif
439 /* (weak ETag W/"<etag>" will not match Last-Modified) */
440 const buffer * const cmp = (if_range->ptr[0] == '"')
441 ? http_header_response_get(r, HTTP_HEADER_ETAG,
442 CONST_STR_LEN("ETag"))
443 : http_header_response_get(r, HTTP_HEADER_LAST_MODIFIED,
444 CONST_STR_LEN("Last-Modified"));
445 if (!cmp || !buffer_is_equal(if_range, cmp))
446 return http_status;
447
448 #if 0 /*(questionable utility; not deemed worthwhile)*/
449 /* (In a modular server, the following RFC recommendation might be
450 * expensive and invasive to implement perfectly, so only making an
451 * effort here to comply with known headers added within this routine
452 * and within the purview of Range requests)
453 * [RFC7233] 4.1 206 Partial Content
454 * If a 206 is generated in response to a request with an If-Range
455 * header field, the sender SHOULD NOT generate other representation
456 * header fields beyond those required above, because the client is
457 * understood to already have a prior response containing those header
458 * fields.
459 */
460 http_header_response_unset(r, HTTP_HEADER_ACCEPT_RANGES,
461 CONST_STR_LEN("Accept-Ranges"));
462 #endif
463 }
464
465 return http_range_process(r, http_range);
466 }
467