xref: /lighttpd1.4/src/http_range.c (revision d958cf32)
1 /*
2  * http_range - HTTP Range (RFC 7233)
3  *
4  * Copyright(c) 2015,2021 Glenn Strauss gstrauss()gluelogic.com  All rights reserved
5  * License: BSD 3-clause (same as lighttpd)
6  */
7 #include "http_range.h"
8 
9 #include <limits.h>
10 #include <stdlib.h>   /* strtol(), strtoll() */
11 #include <string.h>   /* memmove() */
12 
13 #include "buffer.h"
14 #include "chunk.h"
15 #include "http_header.h"
16 #include "request.h"
17 
18 /* arbitrary limit for max num ranges (additional ranges are ignored) */
19 #undef  RMAX
20 #define RMAX 10
21 
22 /* RFC 7233 Hypertext Transfer Protocol (HTTP/1.1): Range Requests
23  * https://tools.ietf.org/html/rfc7233
24  * Range requests are an OPTIONAL feature of HTTP, designed so that recipients
25  * not implementing this feature (or not supporting it for the target resource)
26  * can respond as if it is a normal GET request without impacting
27  * interoperability. */
28 
29 
30 __attribute_noinline__
31 static int
32 http_range_coalesce (off_t * const restrict ranges, int n)
33 {
34     /* coalesce/combine overlapping ranges and ranges separated by a
35      * gap which is smaller than the overhead of sending multiple parts
36      * (typically around 80 bytes) ([RFC7233] 4.1 206 Partial Content)
37      * (ranges are known to be positive, so subtract 80 instead of add 80
38      *  to avoid any chance of integer overflow)
39      * (n is limited to RMAX ranges (RMAX pairs of off_t) since a malicious set
40      *  of ranges has n^2 cost for this simplistic algorithm)
41      * (sorting the ranges and then combining would lower the cost, but the
42      *  cost should not be an issue since client should not send many ranges
43      *  and we restrict the max number of ranges to limit abuse)
44      * [RFC7233] 4.1 206 Partial Content recommends:
45      *   When a multipart response payload is generated, the server SHOULD send
46      *   the parts in the same order that the corresponding byte-range-spec
47      *   appeared in the received Range header field, excluding those ranges
48      *   that were deemed unsatisfiable or that were coalesced into other ranges
49      */
50     for (int i = 0; i+2 < n; i += 2) {
51         const off_t b = ranges[i];
52         const off_t e = ranges[i+1];
53         for (int j = i+2; j < n; j += 2) {
54             /* common case: ranges do not overlap */
55             if (b <= ranges[j] ? e < ranges[j]-80 : ranges[j+1] < b-80)
56                 continue;
57             /* else ranges do overlap, so combine into first range */
58             ranges[i]   = b <= ranges[j]   ? b : ranges[j];
59             ranges[i+1] = e >= ranges[j+1] ? e : ranges[j+1];
60             memmove(ranges+j, ranges+j+2, (n-j-2)*sizeof(off_t));
61             /* restart outer loop from beginning */
62             n -= 2;
63             i = -2;
64             break;
65         }
66     }
67 
68     return n;
69 }
70 
71 
72 static const char *
73 http_range_parse_next (const char * restrict s, const off_t len,
74                        off_t * const restrict ranges)
75 {
76     /*(caller must check retured ranges[1] != -1, or else range was invalid)*/
77 
78     /*assert(len > 0);*//*(caller must ensure len > 0)*/
79     char *e;
80     off_t n = strtoll(s, &e, 10);
81     ranges[1] = -1; /* invalid */
82     if (n >= 0) {
83         if (n != LLONG_MAX && n < len && s != e) {
84             ranges[0] = n;
85             while (*e == ' ' || *e == '\t') ++e;
86             if (*e == '-') {
87                 n = strtoll((s = e+1), &e, 10);
88                 if (s == e || (n == 0 && e[-1] != '0'))
89                     ranges[1] = len-1;
90                 else if (ranges[0] <= n && n != LLONG_MAX)
91                     ranges[1] = n < len ? n : len-1;
92             }
93         }
94     }
95     else if (n != LLONG_MIN) {
96         ranges[0] = len > -n ? len + n : 0;/*('n' is negative here)*/
97         ranges[1] = len-1;
98     }
99     while (*e == ' ' || *e == '\t') ++e;
100     return e;  /* ',' or '\0' or else invalid char in range request */
101 }
102 
103 
104 static int
105 http_range_parse (const char * restrict s, const off_t content_length,
106                   off_t ranges[RMAX*2])
107 {
108     /* [RFC7233] 2.1 Byte Ranges
109      * If a valid byte-range-set includes at least one byte-range-spec
110      * with a first-byte-pos that is less than the current length of
111      * the representation, or at least one suffix-byte-range-spec with
112      * a non-zero suffix-length, then the byte-range-set is satisfiable.
113      * Otherwise, the byte-range-set is unsatisfiable.
114      *
115      * [RFC7233] 3.1 Range
116      * A server that supports range requests MAY ignore or reject a Range
117      * header field that consists of more than two overlapping ranges, or a
118      * set of many small ranges that are not listed in ascending order,
119      * since both are indications of either a broken client or a deliberate
120      * denial-of-service attack (Section 6.1).  A client SHOULD NOT request
121      * multiple ranges that are inherently less efficient to process and
122      * transfer than a single range that encompasses the same data.
123      */
124     int n = 0;
125     do {
126         s = http_range_parse_next(s, content_length, ranges+n);
127         if ((*s == '\0' || *s == ',') && ranges[n+1] != -1)
128             n += 2;
129         else
130             while (*s != '\0' && *s != ',') ++s; /*ignore invalid ranges*/
131     } while (*s++ != '\0' && n < RMAX*2);
132           /*(*s++ for multipart, increment to char after ',')*/
133 
134     if (n <= 2)
135         return n;
136 
137     /* error if n == 0 (no valid ranges)
138      * (if n == RMAX*2 (additional ranges > RMAX limit, if any, were ignored))*/
139     return http_range_coalesce(ranges, n);
140 }
141 
142 
143 static void
144 http_range_single (request_st * const r, const off_t ranges[2])
145 {
146     /* caller already checked: n == 2, content_length > 0, ranges valid */
147     chunkqueue * const restrict cq = &r->write_queue;
148     const off_t complete_length = chunkqueue_length(cq);
149     /* add Content-Range header */
150     /*(large enough for "bytes X-X/X" with 3 huge numbers)*/
151     uint32_t len = sizeof("bytes ")-1;
152     char cr[72] = "bytes ";
153     len += (uint32_t)li_itostrn(cr+len, sizeof(cr)-len, ranges[0]);
154     cr[len++] = '-';
155     len += (uint32_t)li_itostrn(cr+len, sizeof(cr)-len, ranges[1]);
156     cr[len++] = '/';
157     len += (uint32_t)li_itostrn(cr+len, sizeof(cr)-len, complete_length);
158     http_header_response_set(r, HTTP_HEADER_CONTENT_RANGE,
159                              CONST_STR_LEN("Content-Range"), cr, len);
160     if (cq->first == cq->last) { /* single chunk in cq */
161         /* consume from cq to start of range, truncate after end of range */
162         if (ranges[0]) {
163             chunkqueue_mark_written(cq, ranges[0]);
164             cq->bytes_out -= ranges[0];
165             cq->bytes_in -= ranges[0];
166         }
167         cq->bytes_in -= complete_length - (ranges[1] + 1);
168         chunk * const c = cq->first;
169         if (c->type == FILE_CHUNK)
170             c->file.length = c->offset + ranges[1] - ranges[0] + 1;
171         else /*(c->type == MEM_CHUNK)*/
172             c->mem->used = c->offset + ranges[1] - ranges[0] + 1 + 1;
173     }
174     else {
175         /* transfer contents to temporary cq, then transfer range back */
176         chunkqueue tq;
177         memset(&tq, 0, sizeof(tq));
178         chunkqueue_steal(&tq, cq, complete_length);
179         cq->bytes_out -= complete_length;
180         cq->bytes_in -= complete_length;
181         chunkqueue_mark_written(&tq, ranges[0]);
182         chunkqueue_steal(cq, &tq, ranges[1] - ranges[0] + 1);
183         chunkqueue_reset(&tq);
184     }
185 }
186 
187 
188 __attribute_cold__
189 static void
190 http_range_multi (request_st * const r,
191                   const off_t ranges[RMAX*2], const int n)
192 {
193     /* multiple ranges that are not ordered are not expected to be common,
194      * so those scenarios is not optimized here */
195     #define HTTP_MULTIPART_BOUNDARY "fkj49sn38dcn3"
196     static const char boundary_prefix[] =
197       "\r\n--" HTTP_MULTIPART_BOUNDARY;
198     static const char boundary_end[] =
199       "\r\n--" HTTP_MULTIPART_BOUNDARY "--\r\n";
200     static const char multipart_type[] =
201       "multipart/byteranges; boundary=" HTTP_MULTIPART_BOUNDARY;
202 
203     buffer * const tb = r->tmp_buf;
204     buffer_copy_string_len(tb, CONST_STR_LEN(boundary_prefix));
205     const buffer * const content_type =
206       http_header_response_get(r, HTTP_HEADER_CONTENT_TYPE,
207                                CONST_STR_LEN("Content-Type"));
208     if (content_type) {
209         buffer_append_str2(tb, CONST_STR_LEN("\r\nContent-Type: "),
210                                BUF_PTR_LEN(content_type));
211     }
212     buffer_append_string_len(tb,CONST_STR_LEN("\r\nContent-Range: bytes "));
213     const uint32_t prefix_len = buffer_clen(tb);
214 
215     http_header_response_set(r, HTTP_HEADER_CONTENT_TYPE,
216                              CONST_STR_LEN("Content-Type"),
217                              CONST_STR_LEN(multipart_type));
218 
219     /* caller already checked: n > 2, content_length > 0, ranges valid */
220     chunkqueue * const restrict cq = &r->write_queue;
221     const off_t complete_length = chunkqueue_length(cq);
222 
223     /* copy chunks for ranges to end of cq, then consume original chunks
224      *
225      * future: if ranges ordered, could use technique in http_range_single(),
226      * but [RFC7233] 4.1 206 Partial Content recommends:
227      *   When a multipart response payload is generated, the server SHOULD send
228      *   the parts in the same order that the corresponding byte-range-spec
229      *   appeared in the received Range header field, excluding those ranges
230      *   that were deemed unsatisfiable or that were coalesced into other ranges
231      * and this code path is not expected to be hot, and so not optimized.
232      */
233 
234     chunk * const c = (cq->first == cq->last && cq->first->type == MEM_CHUNK)
235       ? cq->first
236       : NULL;
237     for (int i = 0; i < n; i += 2) {
238         /* generate boundary-header including Content-Type and Content-Range */
239         buffer_truncate(tb, prefix_len);
240         buffer_append_int(tb, ranges[i]);
241         buffer_append_string_len(tb, CONST_STR_LEN("-"));
242         buffer_append_int(tb, ranges[i+1]);
243         buffer_append_string_len(tb, CONST_STR_LEN("/"));
244         buffer_append_int(tb, complete_length);
245         buffer_append_string_len(tb, CONST_STR_LEN("\r\n\r\n"));
246         if (c) /* single MEM_CHUNK in original cq; not using mem_min */
247             chunkqueue_append_mem(cq, BUF_PTR_LEN(tb));
248         else
249             chunkqueue_append_mem_min(cq, BUF_PTR_LEN(tb));
250 
251         chunkqueue_append_cq_range(cq, cq, ranges[i],
252                                    ranges[i+1] - ranges[i] + 1);
253     }
254 
255     /* add boundary end */
256     chunkqueue_append_mem_min(cq, CONST_STR_LEN(boundary_end));
257 
258     /* remove initial chunk(s), since duplicated into multipart ranges */
259     /* remove initial "\r\n" in front of first boundary string */
260     chunkqueue_mark_written(cq, complete_length+2);
261     cq->bytes_out -= complete_length+2;
262     cq->bytes_in -= complete_length+2;
263 }
264 
265 
266 __attribute_cold__
267 static int
268 http_range_not_satisfiable (request_st * const r, const off_t content_length)
269 {
270     /*(large enough for "bytes '*'/X" with 1 huge number)*/
271     uint32_t len = sizeof("bytes */")-1;
272     char cr[32] = "bytes */";
273     len += (uint32_t)li_itostrn(cr+len, sizeof(cr)-len, content_length);
274     http_header_response_set(r, HTTP_HEADER_CONTENT_RANGE,
275                              CONST_STR_LEN("Content-Range"), cr, len);
276     r->handler_module = NULL;
277     return (r->http_status = 416); /* Range Not Satisfiable */
278 }
279 
280 
281 __attribute_noinline__
282 static int
283 http_range_process (request_st * const r, const buffer * const http_range)
284 {
285     const off_t content_length = chunkqueue_length(&r->write_queue);
286     if (0 == content_length) /*(implementation detail; see comment at top)*/
287         return r->http_status;  /* skip Range handling if empty payload */
288     /* future: might skip Range if content_length is below threshold, e.g. 1k */
289 
290     /* An origin server MUST ignore a Range header field that contains a
291      * range unit it does not understand. */
292     if (buffer_clen(http_range) < sizeof("bytes=")-1
293         || !buffer_eq_icase_ssn(http_range->ptr, "bytes=", sizeof("bytes=")-1))
294         return r->http_status;         /* 200 OK */
295 
296     /* arbitrary limit: support up to RMAX ranges in request Range field
297      * (validating and coalescing overlapping ranges is not a linear algorithm)
298      * (use RMAX pairs of off_t to indicate too many ranges ( >= RMAX*2)) */
299     off_t ranges[RMAX*2];
300     const int n = http_range_parse(http_range->ptr+sizeof("bytes=")-1,
301                                    content_length, ranges);
302 
303     /* checked above: content_length > 0, ranges valid */
304     if (2 == n) /* single range */
305         http_range_single(r, ranges);
306     else if (0 == n)                   /* 416 Range Not Satisfiable */
307         return http_range_not_satisfiable(r, content_length);
308     else        /* multipart ranges */
309         http_range_multi(r, ranges, n);
310 
311     /*(must either set Content-Length or unset prior value, if any)*/
312     buffer_append_int(
313       http_header_response_set_ptr(r, HTTP_HEADER_CONTENT_LENGTH,
314                                    CONST_STR_LEN("Content-Length")),
315       chunkqueue_length(&r->write_queue));
316 
317     return (r->http_status = 206);     /* 206 Partial Content */
318 }
319 
320 
321 int
322 http_range_rfc7233 (request_st * const r)
323 {
324     const int http_status = r->http_status;
325 
326     /* implementation limitation:
327      * limit range handling to when we have complete response
328      * (future: might extend this to streamed files if Content-Length known)
329      * (otherwise, might be unable to validate Range before send response header
330      *  e.g. unable to handle suffix-byte-range-spec without entity length)*/
331     if (!r->resp_body_finished)
332         return http_status;
333 
334     /* limit range handling to 200 responses
335      * [RFC7233] 3.1 Range
336      *   The Range header field is evaluated after evaluating the precondition
337      *   header fields defined in [RFC7232], and only if the result in absence
338      *   of the Range header field would be a 200 (OK) response. */
339     if (200 != http_status)
340         return http_status;
341     /* limit range handling to GET and HEAD (further limited below to GET)
342      * [RFC7233] 3.1 Range
343      *   A server MUST ignore a Range header field received with a request
344      *   method other than GET.
345      */
346     if (!http_method_get_or_head(r->http_method))
347         return http_status;
348     /* no "Range" in HTTP/1.0 */
349     if (r->http_version < HTTP_VERSION_1_1)
350         return http_status;
351     /* do not attempt to handle range if Transfer-Encoding already applied.
352      * skip Range processing if Content-Encoding has already been applied,
353      * since Range is on the unencoded content length and Content-Encoding
354      * might change content.  This includes if mod_deflate has applied
355      * Content-Encoding.  If Transfer-Encoding: gzip, chunked were used
356      * instead (not done), then Range processing could safely take place, too,
357      * (but mod_deflate would need to run from new hook to handle TE: gzip).
358      * Alternatively, lighttpd.conf could be configured to disable mod_deflate
359      * for Range requests:
360      *   $REQUEST_HEADER["Range"] != "" { deflate.mimetypes = () }
361      */
362     if ((r->resp_htags
363          & (light_bshift(HTTP_HEADER_TRANSFER_ENCODING)
364            |light_bshift(HTTP_HEADER_CONTENT_ENCODING))))
365         return http_status;
366   #if 0 /*(if Range already handled, HTTP status expected to be 206, not 200)*/
367     /* check if Range request already handled (single range or multipart)*/
368     if (light_btst(r->resp_htags, HTTP_HEADER_CONTENT_RANGE))
369         return http_status;
370     const buffer * const content_type =
371       http_header_response_get(r, HTTP_HEADER_CONTENT_TYPE,
372                                CONST_STR_LEN("Content-Type"));
373     if (content_type
374         && buffer_clen(content_type) >= sizeof("multipart/byteranges")-1
375         && buffer_eq_icase_ssn(content_type->ptr, "multipart/byteranges",
376                                sizeof("multipart/byteranges")-1))
377         return http_status;
378   #endif
379 
380     /* optional: advertise Accept-Ranges: bytes
381      * Even if "Accept-Ranges: bytes" is not given,
382      * [RFC7233] 2.3 Accept-Ranges
383      *   A client MAY generate range requests without having received this
384      *   header for the resource involved.
385      */
386     if (!light_btst(r->resp_htags, HTTP_HEADER_ACCEPT_RANGES))
387         http_header_response_set(r, HTTP_HEADER_ACCEPT_RANGES,
388                                  CONST_STR_LEN("Accept-Ranges"),
389                                  CONST_STR_LEN("bytes"));
390     else {
391         const buffer * const accept_ranges =
392           http_header_response_get(r, HTTP_HEADER_ACCEPT_RANGES,
393                                    CONST_STR_LEN("Accept-Ranges"));
394       #ifdef __COVERITY__
395         force_assert(accept_ranges); /*(r->resp_htags checked above)*/
396       #endif
397         if (buffer_eq_slen(accept_ranges, CONST_STR_LEN("none")))
398             return http_status;
399     }
400 
401     /* limit range handling to GET
402      * [RFC7233] 3.1 Range
403      *   A server MUST ignore a Range header field received with a request
404      *   method other than GET.
405      */
406     if (r->http_method != HTTP_METHOD_GET)
407         return http_status;
408 
409     /* check for Range request */
410     const buffer * const http_range =
411       http_header_request_get(r, HTTP_HEADER_RANGE, CONST_STR_LEN("Range"));
412     if (!http_range)
413         return http_status;
414 
415     /* [RFC7233] 3.2 If-Range
416      *   If-Range = entity-tag / HTTP-date
417      *   A client MUST NOT generate an If-Range header field containing an
418      *   entity-tag that is marked as weak.
419      *   [...]
420      *   Note that this comparison by exact match, including when the validator
421      *   is an HTTP-date, differs from the "earlier than or equal to" comparison
422      *   used when evaluating an If-Unmodified-Since conditional. */
423     if (light_btst(r->rqst_htags, HTTP_HEADER_IF_RANGE)) {
424         const buffer * const if_range =
425           http_header_request_get(r, HTTP_HEADER_IF_RANGE,
426                                   CONST_STR_LEN("If-Range"));
427       #ifdef __COVERITY__
428         force_assert(if_range); /*(r->rqst_htags checked above)*/
429       #endif
430         /* (weak ETag W/"<etag>" will not match Last-Modified) */
431         const buffer * const cmp = (if_range->ptr[0] == '"')
432           ? http_header_response_get(r, HTTP_HEADER_ETAG,
433                                      CONST_STR_LEN("ETag"))
434           : http_header_response_get(r, HTTP_HEADER_LAST_MODIFIED,
435                                      CONST_STR_LEN("Last-Modified"));
436         if (!cmp || !buffer_is_equal(if_range, cmp))
437             return http_status;
438 
439       #if 0 /*(questionable utility; not deemed worthwhile)*/
440         /* (In a modular server, the following RFC recommentation might be
441          *  expensive and invasive to implement perfectly, so only making an
442          *  effort here to comply with known headers added within this routine
443          *  and within the purview of Range requests)
444          * [RFC7233] 4.1 206 Partial Content
445          *   If a 206 is generated in response to a request with an If-Range
446          *   header field, the sender SHOULD NOT generate other representation
447          *   header fields beyond those required above, because the client is
448          *   understood to already have a prior response containing those header
449          *   fields.
450          */
451         http_header_response_unset(r, HTTP_HEADER_ACCEPT_RANGES,
452                                    CONST_STR_LEN("Accept-Ranges"));
453       #endif
454     }
455 
456     return http_range_process(r, http_range);
457 }
458