xref: /lighttpd1.4/src/http_range.c (revision 85b5988d)
1 /*
2  * http_range - HTTP Range (RFC 7233)
3  *
4  * Copyright(c) 2015,2021 Glenn Strauss gstrauss()gluelogic.com  All rights reserved
5  * License: BSD 3-clause (same as lighttpd)
6  */
7 #include "http_range.h"
8 
9 #include <limits.h>
10 #include <stdlib.h>   /* strtol(), strtoll() */
11 #include <string.h>   /* memmove() */
12 
13 #include "buffer.h"
14 #include "chunk.h"
15 #include "http_header.h"
16 #include "request.h"
17 
18 /* arbitrary limit for max num ranges (additional ranges are ignored) */
19 #undef  RMAX
20 #define RMAX 10
21 
22 /* RFC 7233 Hypertext Transfer Protocol (HTTP/1.1): Range Requests
23  * https://tools.ietf.org/html/rfc7233
24  * Range requests are an OPTIONAL feature of HTTP, designed so that recipients
25  * not implementing this feature (or not supporting it for the target resource)
26  * can respond as if it is a normal GET request without impacting
27  * interoperability. */
28 
29 
30 /* default: ignore Range with HTTP/1.0 requests */
31 static int http_range_allow_http10;
32 void http_range_config_allow_http10 (int flag)
33 {
34     http_range_allow_http10 = flag;
35 }
36 
37 
38 __attribute_noinline__
39 static int
40 http_range_coalesce (off_t * const restrict ranges, int n)
41 {
42     /* coalesce/combine overlapping ranges and ranges separated by a
43      * gap which is smaller than the overhead of sending multiple parts
44      * (typically around 80 bytes) ([RFC7233] 4.1 206 Partial Content)
45      * (ranges are known to be positive, so subtract 80 instead of add 80
46      *  to avoid any chance of integer overflow)
47      * (n is limited to RMAX ranges (RMAX pairs of off_t) since a malicious set
48      *  of ranges has n^2 cost for this simplistic algorithm)
49      * (sorting the ranges and then combining would lower the cost, but the
50      *  cost should not be an issue since client should not send many ranges
51      *  and we restrict the max number of ranges to limit abuse)
52      * [RFC7233] 4.1 206 Partial Content recommends:
53      *   When a multipart response payload is generated, the server SHOULD send
54      *   the parts in the same order that the corresponding byte-range-spec
55      *   appeared in the received Range header field, excluding those ranges
56      *   that were deemed unsatisfiable or that were coalesced into other ranges
57      */
58     for (int i = 0; i+2 < n; i += 2) {
59         const off_t b = ranges[i];
60         const off_t e = ranges[i+1];
61         for (int j = i+2; j < n; j += 2) {
62             /* common case: ranges do not overlap */
63             if (b <= ranges[j] ? e < ranges[j]-80 : ranges[j+1] < b-80)
64                 continue;
65             /* else ranges do overlap, so combine into first range */
66             ranges[i]   = b <= ranges[j]   ? b : ranges[j];
67             ranges[i+1] = e >= ranges[j+1] ? e : ranges[j+1];
68             memmove(ranges+j, ranges+j+2, (n-j-2)*sizeof(off_t));
69             /* restart outer loop from beginning */
70             n -= 2;
71             i = -2;
72             break;
73         }
74     }
75 
76     return n;
77 }
78 
79 
80 static const char *
81 http_range_parse_next (const char * restrict s, const off_t len,
82                        off_t * const restrict ranges)
83 {
84     /*(caller must check retured ranges[1] != -1, or else range was invalid)*/
85 
86     /*assert(len > 0);*//*(caller must ensure len > 0)*/
87     char *e;
88     off_t n = strtoll(s, &e, 10);
89     ranges[1] = -1; /* invalid */
90     if (n >= 0) {
91         if (n != LLONG_MAX && n < len && s != e) {
92             ranges[0] = n;
93             while (*e == ' ' || *e == '\t') ++e;
94             if (*e == '-') {
95                 n = strtoll((s = e+1), &e, 10);
96                 if (s == e || (n == 0 && e[-1] != '0'))
97                     ranges[1] = len-1;
98                 else if (ranges[0] <= n && n != LLONG_MAX)
99                     ranges[1] = n < len ? n : len-1;
100             }
101         }
102     }
103     else if (n != LLONG_MIN) {
104         ranges[0] = len > -n ? len + n : 0;/*('n' is negative here)*/
105         ranges[1] = len-1;
106     }
107     while (*e == ' ' || *e == '\t') ++e;
108     return e;  /* ',' or '\0' or else invalid char in range request */
109 }
110 
111 
112 static int
113 http_range_parse (const char * restrict s, const off_t content_length,
114                   off_t ranges[RMAX*2])
115 {
116     /* [RFC7233] 2.1 Byte Ranges
117      * If a valid byte-range-set includes at least one byte-range-spec
118      * with a first-byte-pos that is less than the current length of
119      * the representation, or at least one suffix-byte-range-spec with
120      * a non-zero suffix-length, then the byte-range-set is satisfiable.
121      * Otherwise, the byte-range-set is unsatisfiable.
122      *
123      * [RFC7233] 3.1 Range
124      * A server that supports range requests MAY ignore or reject a Range
125      * header field that consists of more than two overlapping ranges, or a
126      * set of many small ranges that are not listed in ascending order,
127      * since both are indications of either a broken client or a deliberate
128      * denial-of-service attack (Section 6.1).  A client SHOULD NOT request
129      * multiple ranges that are inherently less efficient to process and
130      * transfer than a single range that encompasses the same data.
131      */
132     int n = 0;
133     do {
134         s = http_range_parse_next(s, content_length, ranges+n);
135         if ((*s == '\0' || *s == ',') && ranges[n+1] != -1)
136             n += 2;
137         else
138             while (*s != '\0' && *s != ',') ++s; /*ignore invalid ranges*/
139     } while (*s++ != '\0' && n < RMAX*2);
140           /*(*s++ for multipart, increment to char after ',')*/
141 
142     if (n <= 2)
143         return n;
144 
145     /* error if n == 0 (no valid ranges)
146      * (if n == RMAX*2 (additional ranges > RMAX limit, if any, were ignored))*/
147     return http_range_coalesce(ranges, n);
148 }
149 
150 
151 static void
152 http_range_single (request_st * const r, const off_t ranges[2])
153 {
154     /* caller already checked: n == 2, content_length > 0, ranges valid */
155     chunkqueue * const restrict cq = &r->write_queue;
156     const off_t complete_length = chunkqueue_length(cq);
157     /* add Content-Range header */
158     /*(large enough for "bytes X-X/X" with 3 huge numbers)*/
159     uint32_t len = sizeof("bytes ")-1;
160     char cr[72] = "bytes ";
161     len += (uint32_t)li_itostrn(cr+len, sizeof(cr)-len, ranges[0]);
162     cr[len++] = '-';
163     len += (uint32_t)li_itostrn(cr+len, sizeof(cr)-len, ranges[1]);
164     cr[len++] = '/';
165     len += (uint32_t)li_itostrn(cr+len, sizeof(cr)-len, complete_length);
166     http_header_response_set(r, HTTP_HEADER_CONTENT_RANGE,
167                              CONST_STR_LEN("Content-Range"), cr, len);
168     if (cq->first == cq->last) { /* single chunk in cq */
169         /* consume from cq to start of range, truncate after end of range */
170         if (ranges[0]) {
171             chunkqueue_mark_written(cq, ranges[0]);
172             cq->bytes_out -= ranges[0];
173             cq->bytes_in -= ranges[0];
174         }
175         cq->bytes_in -= complete_length - (ranges[1] + 1);
176         chunk * const c = cq->first;
177         if (c->type == FILE_CHUNK)
178             c->file.length = c->offset + ranges[1] - ranges[0] + 1;
179         else /*(c->type == MEM_CHUNK)*/
180             c->mem->used = c->offset + ranges[1] - ranges[0] + 1 + 1;
181     }
182     else {
183         /* transfer contents to temporary cq, then transfer range back */
184         chunkqueue tq;
185         memset(&tq, 0, sizeof(tq));
186         chunkqueue_steal(&tq, cq, complete_length);
187         cq->bytes_out -= complete_length;
188         cq->bytes_in -= complete_length;
189         chunkqueue_mark_written(&tq, ranges[0]);
190         chunkqueue_steal(cq, &tq, ranges[1] - ranges[0] + 1);
191         chunkqueue_reset(&tq);
192     }
193 }
194 
195 
196 __attribute_cold__
197 static void
198 http_range_multi (request_st * const r,
199                   const off_t ranges[RMAX*2], const int n)
200 {
201     /* multiple ranges that are not ordered are not expected to be common,
202      * so those scenarios is not optimized here */
203     #define HTTP_MULTIPART_BOUNDARY "fkj49sn38dcn3"
204     static const char boundary_prefix[] =
205       "\r\n--" HTTP_MULTIPART_BOUNDARY;
206     static const char boundary_end[] =
207       "\r\n--" HTTP_MULTIPART_BOUNDARY "--\r\n";
208     static const char multipart_type[] =
209       "multipart/byteranges; boundary=" HTTP_MULTIPART_BOUNDARY;
210 
211     buffer * const tb = r->tmp_buf;
212     buffer_copy_string_len(tb, CONST_STR_LEN(boundary_prefix));
213     const buffer * const content_type =
214       http_header_response_get(r, HTTP_HEADER_CONTENT_TYPE,
215                                CONST_STR_LEN("Content-Type"));
216     if (content_type) {
217         buffer_append_str2(tb, CONST_STR_LEN("\r\nContent-Type: "),
218                                BUF_PTR_LEN(content_type));
219     }
220     buffer_append_string_len(tb,CONST_STR_LEN("\r\nContent-Range: bytes "));
221     const uint32_t prefix_len = buffer_clen(tb);
222 
223     http_header_response_set(r, HTTP_HEADER_CONTENT_TYPE,
224                              CONST_STR_LEN("Content-Type"),
225                              CONST_STR_LEN(multipart_type));
226 
227     /* caller already checked: n > 2, content_length > 0, ranges valid */
228     chunkqueue * const restrict cq = &r->write_queue;
229     const off_t complete_length = chunkqueue_length(cq);
230 
231     /* copy chunks for ranges to end of cq, then consume original chunks
232      *
233      * future: if ranges ordered, could use technique in http_range_single(),
234      * but [RFC7233] 4.1 206 Partial Content recommends:
235      *   When a multipart response payload is generated, the server SHOULD send
236      *   the parts in the same order that the corresponding byte-range-spec
237      *   appeared in the received Range header field, excluding those ranges
238      *   that were deemed unsatisfiable or that were coalesced into other ranges
239      * and this code path is not expected to be hot, and so not optimized.
240      */
241 
242     chunk * const c = (cq->first == cq->last && cq->first->type == MEM_CHUNK)
243       ? cq->first
244       : NULL;
245     for (int i = 0; i < n; i += 2) {
246         /* generate boundary-header including Content-Type and Content-Range */
247         buffer_truncate(tb, prefix_len);
248         buffer_append_int(tb, ranges[i]);
249         buffer_append_char(tb, '-');
250         buffer_append_int(tb, ranges[i+1]);
251         buffer_append_char(tb, '/');
252         buffer_append_int(tb, complete_length);
253         buffer_append_string_len(tb, CONST_STR_LEN("\r\n\r\n"));
254         if (c) /* single MEM_CHUNK in original cq; not using mem_min */
255             chunkqueue_append_mem(cq, BUF_PTR_LEN(tb));
256         else
257             chunkqueue_append_mem_min(cq, BUF_PTR_LEN(tb));
258 
259         chunkqueue_append_cq_range(cq, cq, ranges[i],
260                                    ranges[i+1] - ranges[i] + 1);
261     }
262 
263     /* add boundary end */
264     chunkqueue_append_mem_min(cq, CONST_STR_LEN(boundary_end));
265 
266     /* remove initial chunk(s), since duplicated into multipart ranges */
267     /* remove initial "\r\n" in front of first boundary string */
268     chunkqueue_mark_written(cq, complete_length+2);
269     cq->bytes_out -= complete_length+2;
270     cq->bytes_in -= complete_length+2;
271 }
272 
273 
274 __attribute_cold__
275 static int
276 http_range_not_satisfiable (request_st * const r, const off_t content_length)
277 {
278     /*(large enough for "bytes '*'/X" with 1 huge number)*/
279     uint32_t len = sizeof("bytes */")-1;
280     char cr[32] = "bytes */";
281     len += (uint32_t)li_itostrn(cr+len, sizeof(cr)-len, content_length);
282     http_header_response_set(r, HTTP_HEADER_CONTENT_RANGE,
283                              CONST_STR_LEN("Content-Range"), cr, len);
284     r->handler_module = NULL;
285     return (r->http_status = 416); /* Range Not Satisfiable */
286 }
287 
288 
289 __attribute_noinline__
290 static int
291 http_range_process (request_st * const r, const buffer * const http_range)
292 {
293     const off_t content_length = chunkqueue_length(&r->write_queue);
294     if (0 == content_length) /*(implementation detail; see comment at top)*/
295         return r->http_status;  /* skip Range handling if empty payload */
296     /* future: might skip Range if content_length is below threshold, e.g. 1k */
297 
298     /* An origin server MUST ignore a Range header field that contains a
299      * range unit it does not understand. */
300     if (buffer_clen(http_range) < sizeof("bytes=")-1
301         || !buffer_eq_icase_ssn(http_range->ptr, "bytes=", sizeof("bytes=")-1))
302         return r->http_status;         /* 200 OK */
303 
304     /* arbitrary limit: support up to RMAX ranges in request Range field
305      * (validating and coalescing overlapping ranges is not a linear algorithm)
306      * (use RMAX pairs of off_t to indicate too many ranges ( >= RMAX*2)) */
307     off_t ranges[RMAX*2];
308     const int n = http_range_parse(http_range->ptr+sizeof("bytes=")-1,
309                                    content_length, ranges);
310 
311     /* checked above: content_length > 0, ranges valid */
312     if (2 == n) /* single range */
313         http_range_single(r, ranges);
314     else if (0 == n)                   /* 416 Range Not Satisfiable */
315         return http_range_not_satisfiable(r, content_length);
316     else        /* multipart ranges */
317         http_range_multi(r, ranges, n);
318 
319     /*(must either set Content-Length or unset prior value, if any)*/
320     buffer_append_int(
321       http_header_response_set_ptr(r, HTTP_HEADER_CONTENT_LENGTH,
322                                    CONST_STR_LEN("Content-Length")),
323       chunkqueue_length(&r->write_queue));
324 
325     return (r->http_status = 206);     /* 206 Partial Content */
326 }
327 
328 
329 int
330 http_range_rfc7233 (request_st * const r)
331 {
332     const int http_status = r->http_status;
333 
334     /* implementation limitation:
335      * limit range handling to when we have complete response
336      * (future: might extend this to streamed files if Content-Length known)
337      * (otherwise, might be unable to validate Range before send response header
338      *  e.g. unable to handle suffix-byte-range-spec without entity length)*/
339     if (!r->resp_body_finished)
340         return http_status;
341 
342     /* limit range handling to 200 responses
343      * [RFC7233] 3.1 Range
344      *   The Range header field is evaluated after evaluating the precondition
345      *   header fields defined in [RFC7232], and only if the result in absence
346      *   of the Range header field would be a 200 (OK) response. */
347     if (200 != http_status)
348         return http_status;
349     /* limit range handling to GET and HEAD (further limited below to GET)
350      * [RFC7233] 3.1 Range
351      *   A server MUST ignore a Range header field received with a request
352      *   method other than GET.
353      */
354     if (!http_method_get_or_head(r->http_method))
355         return http_status;
356     /* no "Range" in HTTP/1.0 */
357     if (r->http_version < HTTP_VERSION_1_1)
358       if (!http_range_allow_http10)
359         return http_status;
360     /* do not attempt to handle range if Transfer-Encoding already applied.
361      * skip Range processing if Content-Encoding has already been applied,
362      * since Range is on the unencoded content length and Content-Encoding
363      * might change content.  This includes if mod_deflate has applied
364      * Content-Encoding.  If Transfer-Encoding: gzip, chunked were used
365      * instead (not done), then Range processing could safely take place, too,
366      * (but mod_deflate would need to run from new hook to handle TE: gzip).
367      * Alternatively, lighttpd.conf could be configured to disable mod_deflate
368      * for Range requests:
369      *   $REQUEST_HEADER["Range"] != "" { deflate.mimetypes = () }
370      */
371     if ((r->resp_htags
372          & (light_bshift(HTTP_HEADER_TRANSFER_ENCODING)
373            |light_bshift(HTTP_HEADER_CONTENT_ENCODING))))
374         return http_status;
375   #if 0 /*(if Range already handled, HTTP status expected to be 206, not 200)*/
376     /* check if Range request already handled (single range or multipart)*/
377     if (light_btst(r->resp_htags, HTTP_HEADER_CONTENT_RANGE))
378         return http_status;
379     const buffer * const content_type =
380       http_header_response_get(r, HTTP_HEADER_CONTENT_TYPE,
381                                CONST_STR_LEN("Content-Type"));
382     if (content_type
383         && buffer_clen(content_type) >= sizeof("multipart/byteranges")-1
384         && buffer_eq_icase_ssn(content_type->ptr, "multipart/byteranges",
385                                sizeof("multipart/byteranges")-1))
386         return http_status;
387   #endif
388 
389     /* optional: advertise Accept-Ranges: bytes
390      * Even if "Accept-Ranges: bytes" is not given,
391      * [RFC7233] 2.3 Accept-Ranges
392      *   A client MAY generate range requests without having received this
393      *   header for the resource involved.
394      */
395     if (!light_btst(r->resp_htags, HTTP_HEADER_ACCEPT_RANGES))
396         http_header_response_set(r, HTTP_HEADER_ACCEPT_RANGES,
397                                  CONST_STR_LEN("Accept-Ranges"),
398                                  CONST_STR_LEN("bytes"));
399     else {
400         const buffer * const accept_ranges =
401           http_header_response_get(r, HTTP_HEADER_ACCEPT_RANGES,
402                                    CONST_STR_LEN("Accept-Ranges"));
403       #ifdef __COVERITY__
404         force_assert(accept_ranges); /*(r->resp_htags checked above)*/
405       #endif
406         if (buffer_eq_slen(accept_ranges, CONST_STR_LEN("none")))
407             return http_status;
408     }
409 
410     /* limit range handling to GET
411      * [RFC7233] 3.1 Range
412      *   A server MUST ignore a Range header field received with a request
413      *   method other than GET.
414      */
415     if (r->http_method != HTTP_METHOD_GET)
416         return http_status;
417 
418     /* check for Range request */
419     const buffer * const http_range =
420       http_header_request_get(r, HTTP_HEADER_RANGE, CONST_STR_LEN("Range"));
421     if (!http_range)
422         return http_status;
423 
424     /* [RFC7233] 3.2 If-Range
425      *   If-Range = entity-tag / HTTP-date
426      *   A client MUST NOT generate an If-Range header field containing an
427      *   entity-tag that is marked as weak.
428      *   [...]
429      *   Note that this comparison by exact match, including when the validator
430      *   is an HTTP-date, differs from the "earlier than or equal to" comparison
431      *   used when evaluating an If-Unmodified-Since conditional. */
432     if (light_btst(r->rqst_htags, HTTP_HEADER_IF_RANGE)) {
433         const buffer * const if_range =
434           http_header_request_get(r, HTTP_HEADER_IF_RANGE,
435                                   CONST_STR_LEN("If-Range"));
436       #ifdef __COVERITY__
437         force_assert(if_range); /*(r->rqst_htags checked above)*/
438       #endif
439         /* (weak ETag W/"<etag>" will not match Last-Modified) */
440         const buffer * const cmp = (if_range->ptr[0] == '"')
441           ? http_header_response_get(r, HTTP_HEADER_ETAG,
442                                      CONST_STR_LEN("ETag"))
443           : http_header_response_get(r, HTTP_HEADER_LAST_MODIFIED,
444                                      CONST_STR_LEN("Last-Modified"));
445         if (!cmp || !buffer_is_equal(if_range, cmp))
446             return http_status;
447 
448       #if 0 /*(questionable utility; not deemed worthwhile)*/
449         /* (In a modular server, the following RFC recommentation might be
450          *  expensive and invasive to implement perfectly, so only making an
451          *  effort here to comply with known headers added within this routine
452          *  and within the purview of Range requests)
453          * [RFC7233] 4.1 206 Partial Content
454          *   If a 206 is generated in response to a request with an If-Range
455          *   header field, the sender SHOULD NOT generate other representation
456          *   header fields beyond those required above, because the client is
457          *   understood to already have a prior response containing those header
458          *   fields.
459          */
460         http_header_response_unset(r, HTTP_HEADER_ACCEPT_RANGES,
461                                    CONST_STR_LEN("Accept-Ranges"));
462       #endif
463     }
464 
465     return http_range_process(r, http_range);
466 }
467