xref: /lighttpd1.4/src/http_range.c (revision dc01487e)
1 /*
2  * http_range - HTTP Range (RFC 7233)
3  *
4  * Copyright(c) 2015,2021 Glenn Strauss gstrauss()gluelogic.com  All rights reserved
5  * License: BSD 3-clause (same as lighttpd)
6  */
7 #include "http_range.h"
8 
9 #include <limits.h>
10 #include <stdlib.h>   /* strtol(), strtoll() */
11 #include <string.h>   /* memmove() */
12 #include <strings.h>  /* strcasecmp(), strncasecmp() */
13 
14 #include "buffer.h"
15 #include "chunk.h"
16 #include "http_header.h"
17 #include "request.h"
18 
19 /* arbitrary limit for max num ranges (additional ranges are ignored) */
20 #undef  RMAX
21 #define RMAX 10
22 
23 /* RFC 7233 Hypertext Transfer Protocol (HTTP/1.1): Range Requests
24  * https://tools.ietf.org/html/rfc7233
25  * Range requests are an OPTIONAL feature of HTTP, designed so that recipients
26  * not implementing this feature (or not supporting it for the target resource)
27  * can respond as if it is a normal GET request without impacting
28  * interoperability. */
29 
30 
31 __attribute_noinline__
32 static int
33 http_range_coalesce (off_t * const restrict ranges, int n)
34 {
35     /* coalesce/combine overlapping ranges and ranges separated by a
36      * gap which is smaller than the overhead of sending multiple parts
37      * (typically around 80 bytes) ([RFC7233] 4.1 206 Partial Content)
38      * (ranges are known to be positive, so subtract 80 instead of add 80
39      *  to avoid any chance of integer overflow)
40      * (n is limited to RMAX ranges (RMAX pairs of off_t) since a malicious set
41      *  of ranges has n^2 cost for this simplistic algorithm)
42      * (sorting the ranges and then combining would lower the cost, but the
43      *  cost should not be an issue since client should not send many ranges
44      *  and we restrict the max number of ranges to limit abuse)
45      * [RFC7233] 4.1 206 Partial Content recommends:
46      *   When a multipart response payload is generated, the server SHOULD send
47      *   the parts in the same order that the corresponding byte-range-spec
48      *   appeared in the received Range header field, excluding those ranges
49      *   that were deemed unsatisfiable or that were coalesced into other ranges
50      */
51     for (int i = 0; i+2 < n; i += 2) {
52         const off_t b = ranges[i];
53         const off_t e = ranges[i+1];
54         for (int j = i+2; j < n; j += 2) {
55             /* common case: ranges do not overlap */
56             if (b <= ranges[j] ? e < ranges[j]-80 : ranges[j+1] < b-80)
57                 continue;
58             /* else ranges do overlap, so combine into first range */
59             ranges[i]   = b <= ranges[j]   ? b : ranges[j];
60             ranges[i+1] = e >= ranges[j+1] ? e : ranges[j+1];
61             memmove(ranges+j, ranges+j+2, (n-j-2)*sizeof(off_t));
62             /* restart outer loop from beginning */
63             n -= 2;
64             i = -2;
65             break;
66         }
67     }
68 
69     return n;
70 }
71 
72 
73 static const char *
74 http_range_parse_next (const char * restrict s, const off_t len,
75                        off_t * const restrict ranges)
76 {
77     /*(caller must check retured ranges[1] != -1, or else range was invalid)*/
78 
79     /*assert(len > 0);*//*(caller must ensure len > 0)*/
80     char *e;
81     off_t n = strtoll(s, &e, 10);
82     ranges[1] = -1; /* invalid */
83     if (n >= 0) {
84         if (n != LLONG_MAX && n < len && s != e) {
85             ranges[0] = n;
86             while (*e == ' ' || *e == '\t') ++e;
87             if (*e == '-') {
88                 n = strtoll((s = e+1), &e, 10);
89                 if (s == e || (n == 0 && e[-1] != '0'))
90                     ranges[1] = len-1;
91                 else if (ranges[0] <= n && n != LLONG_MAX)
92                     ranges[1] = n < len ? n : len-1;
93             }
94         }
95     }
96     else if (n != LLONG_MIN) {
97         ranges[0] = len > -n ? len + n : 0;/*('n' is negative here)*/
98         ranges[1] = len-1;
99     }
100     while (*e == ' ' || *e == '\t') ++e;
101     return e;  /* ',' or '\0' or else invalid char in range request */
102 }
103 
104 
105 static int
106 http_range_parse (const char * restrict s, const off_t content_length,
107                   off_t ranges[RMAX*2])
108 {
109     /* [RFC7233] 2.1 Byte Ranges
110      * If a valid byte-range-set includes at least one byte-range-spec
111      * with a first-byte-pos that is less than the current length of
112      * the representation, or at least one suffix-byte-range-spec with
113      * a non-zero suffix-length, then the byte-range-set is satisfiable.
114      * Otherwise, the byte-range-set is unsatisfiable.
115      *
116      * [RFC7233] 3.1 Range
117      * A server that supports range requests MAY ignore or reject a Range
118      * header field that consists of more than two overlapping ranges, or a
119      * set of many small ranges that are not listed in ascending order,
120      * since both are indications of either a broken client or a deliberate
121      * denial-of-service attack (Section 6.1).  A client SHOULD NOT request
122      * multiple ranges that are inherently less efficient to process and
123      * transfer than a single range that encompasses the same data.
124      */
125     int n = 0;
126     do {
127         s = http_range_parse_next(s, content_length, ranges+n);
128         if ((*s == '\0' || *s == ',') && ranges[n+1] != -1)
129             n += 2;
130         else
131             while (*s != '\0' && *s != ',') ++s; /*ignore invalid ranges*/
132     } while (*s++ != '\0' && n < RMAX*2);
133           /*(*s++ for multipart, increment to char after ',')*/
134 
135     if (n <= 2)
136         return n;
137 
138     /* error if n == 0 (no valid ranges)
139      * (if n == RMAX*2 (additional ranges > RMAX limit, if any, were ignored))*/
140     return http_range_coalesce(ranges, n);
141 }
142 
143 
144 static void
145 http_range_single (request_st * const r, const off_t ranges[2])
146 {
147     /* caller already checked: n == 2, content_length > 0, ranges valid */
148     chunkqueue * const restrict cq = &r->write_queue;
149     const off_t complete_length = chunkqueue_length(cq);
150     /* add Content-Range header */
151     /*(large enough for "bytes X-X/X" with 3 huge numbers)*/
152     uint32_t len = sizeof("bytes ")-1;
153     char cr[72] = "bytes ";
154     len += (uint32_t)li_itostrn(cr+len, sizeof(cr)-len, ranges[0]);
155     cr[len++] = '-';
156     len += (uint32_t)li_itostrn(cr+len, sizeof(cr)-len, ranges[1]);
157     cr[len++] = '/';
158     len += (uint32_t)li_itostrn(cr+len, sizeof(cr)-len, complete_length);
159     http_header_response_set(r, HTTP_HEADER_CONTENT_RANGE,
160                              CONST_STR_LEN("Content-Range"), cr, len);
161     if (cq->first == cq->last) { /* single chunk in cq */
162         /* consume from cq to start of range, truncate after end of range */
163         if (ranges[0]) {
164             chunkqueue_mark_written(cq, ranges[0]);
165             cq->bytes_out -= ranges[0];
166             cq->bytes_in -= ranges[0];
167         }
168         cq->bytes_in -= complete_length - (ranges[1] + 1);
169         chunk * const c = cq->first;
170         if (c->type == FILE_CHUNK)
171             c->file.length = c->offset + ranges[1] - ranges[0] + 1;
172         else /*(c->type == MEM_CHUNK)*/
173             c->mem->used = c->offset + ranges[1] - ranges[0] + 1 + 1;
174     }
175     else {
176         /* transfer contents to temporary cq, then transfer range back */
177         chunkqueue tq;
178         memset(&tq, 0, sizeof(tq));
179         chunkqueue_steal(&tq, cq, complete_length);
180         cq->bytes_out -= complete_length;
181         cq->bytes_in -= complete_length;
182         chunkqueue_mark_written(&tq, ranges[0]);
183         chunkqueue_steal(cq, &tq, ranges[1] - ranges[0] + 1);
184         chunkqueue_reset(&tq);
185     }
186 }
187 
188 
189 __attribute_cold__
190 static void
191 http_range_multi (request_st * const r,
192                   const off_t ranges[RMAX*2], const int n)
193 {
194     /* multiple ranges that are not ordered are not expected to be common,
195      * so those scenarios is not optimized here */
196     #define HTTP_MULTIPART_BOUNDARY "fkj49sn38dcn3"
197     static const char boundary_prefix[] =
198       "\r\n--" HTTP_MULTIPART_BOUNDARY;
199     static const char boundary_end[] =
200       "\r\n--" HTTP_MULTIPART_BOUNDARY "--\r\n";
201     static const char multipart_type[] =
202       "multipart/byteranges; boundary=" HTTP_MULTIPART_BOUNDARY;
203 
204     buffer * const tb = r->tmp_buf;
205     buffer_copy_string_len(tb, CONST_STR_LEN(boundary_prefix));
206     const buffer * const content_type =
207       http_header_response_get(r, HTTP_HEADER_CONTENT_TYPE,
208                                CONST_STR_LEN("Content-Type"));
209     if (content_type) {
210         buffer_append_str2(tb, CONST_STR_LEN("\r\nContent-Type: "),
211                                CONST_BUF_LEN(content_type));
212     }
213     buffer_append_string_len(tb,CONST_STR_LEN("\r\nContent-Range: bytes "));
214     const uint32_t prefix_len = buffer_string_length(tb);
215 
216     http_header_response_set(r, HTTP_HEADER_CONTENT_TYPE,
217                              CONST_STR_LEN("Content-Type"),
218                              CONST_STR_LEN(multipart_type));
219 
220     /* caller already checked: n > 2, content_length > 0, ranges valid */
221     chunkqueue * const restrict cq = &r->write_queue;
222     const off_t complete_length = chunkqueue_length(cq);
223 
224     /* copy chunks for ranges to end of cq, then consume original chunks
225      *
226      * future: if ranges ordered, could use technique in http_range_single(),
227      * but [RFC7233] 4.1 206 Partial Content recommends:
228      *   When a multipart response payload is generated, the server SHOULD send
229      *   the parts in the same order that the corresponding byte-range-spec
230      *   appeared in the received Range header field, excluding those ranges
231      *   that were deemed unsatisfiable or that were coalesced into other ranges
232      * and this code path is not expected to be hot, and so not optimized.
233      */
234 
235     chunk * const c = (cq->first == cq->last && cq->first->type == MEM_CHUNK)
236       ? cq->first
237       : NULL;
238     for (int i = 0; i < n; i += 2) {
239         /* generate boundary-header including Content-Type and Content-Range */
240         buffer_string_set_length(tb, prefix_len);
241         buffer_append_int(tb, ranges[i]);
242         buffer_append_string_len(tb, CONST_STR_LEN("-"));
243         buffer_append_int(tb, ranges[i+1]);
244         buffer_append_string_len(tb, CONST_STR_LEN("/"));
245         buffer_append_int(tb, complete_length);
246         buffer_append_string_len(tb, CONST_STR_LEN("\r\n\r\n"));
247         if (c) /* single MEM_CHUNK in original cq; not using mem_min */
248             chunkqueue_append_mem(cq, CONST_BUF_LEN(tb));
249         else
250             chunkqueue_append_mem_min(cq, CONST_BUF_LEN(tb));
251 
252         chunkqueue_append_cq_range(cq, cq, ranges[i],
253                                    ranges[i+1] - ranges[i] + 1);
254     }
255 
256     /* add boundary end */
257     chunkqueue_append_mem_min(cq, CONST_STR_LEN(boundary_end));
258 
259     /* remove initial chunk(s), since duplicated into multipart ranges */
260     /* remove initial "\r\n" in front of first boundary string */
261     chunkqueue_mark_written(cq, complete_length+2);
262     cq->bytes_out -= complete_length+2;
263     cq->bytes_in -= complete_length+2;
264 }
265 
266 
267 __attribute_cold__
268 static int
269 http_range_not_satisfiable (request_st * const r, const off_t content_length)
270 {
271     /*(large enough for "bytes '*'/X" with 1 huge number)*/
272     uint32_t len = sizeof("bytes */")-1;
273     char cr[32] = "bytes */";
274     len += (uint32_t)li_itostrn(cr+len, sizeof(cr)-len, content_length);
275     http_header_response_set(r, HTTP_HEADER_CONTENT_RANGE,
276                              CONST_STR_LEN("Content-Range"), cr, len);
277     r->handler_module = NULL;
278     return (r->http_status = 416); /* Range Not Satisfiable */
279 }
280 
281 
282 __attribute_noinline__
283 static int
284 http_range_process (request_st * const r, const buffer * const http_range)
285 {
286     const off_t content_length = chunkqueue_length(&r->write_queue);
287     if (0 == content_length) /*(implementation detail; see comment at top)*/
288         return r->http_status;  /* skip Range handling if empty payload */
289     /* future: might skip Range if content_length is below threshold, e.g. 1k */
290 
291     /* An origin server MUST ignore a Range header field that contains a
292      * range unit it does not understand. */
293     if (0 != strncasecmp(http_range->ptr, "bytes=", sizeof("bytes=")-1))
294         return r->http_status;         /* 200 OK */
295 
296     /* arbitrary limit: support up to RMAX ranges in request Range field
297      * (validating and coalescing overlapping ranges is not a linear algorithm)
298      * (use RMAX pairs of off_t to indicate too many ranges ( >= RMAX*2)) */
299     off_t ranges[RMAX*2];
300     const int n = http_range_parse(http_range->ptr+sizeof("bytes=")-1,
301                                    content_length, ranges);
302 
303     /* checked above: content_length > 0, ranges valid */
304     if (2 == n) /* single range */
305         http_range_single(r, ranges);
306     else if (0 == n)                   /* 416 Range Not Satisfiable */
307         return http_range_not_satisfiable(r, content_length);
308     else        /* multipart ranges */
309         http_range_multi(r, ranges, n);
310 
311     /*(must either set Content-Length or unset prior value, if any)*/
312     buffer_append_int(
313       http_header_response_set_ptr(r, HTTP_HEADER_CONTENT_LENGTH,
314                                    CONST_STR_LEN("Content-Length")),
315       chunkqueue_length(&r->write_queue));
316 
317     return (r->http_status = 206);     /* 206 Partial Content */
318 }
319 
320 
321 int
322 http_range_rfc7233 (request_st * const r)
323 {
324     const int http_status = r->http_status;
325 
326     /* implementation limitation:
327      * limit range handling to when we have complete response
328      * (future: might extend this to streamed files if Content-Length known)
329      * (otherwise, might be unable to validate Range before send response header
330      *  e.g. unable to handle suffix-byte-range-spec without entity length)*/
331     if (!r->resp_body_finished)
332         return http_status;
333 
334     /* limit range handling to 200 responses
335      * [RFC7233] 3.1 Range
336      *   The Range header field is evaluated after evaluating the precondition
337      *   header fields defined in [RFC7232], and only if the result in absence
338      *   of the Range header field would be a 200 (OK) response. */
339     if (200 != http_status)
340         return http_status;
341     /* limit range handling to GET and HEAD (further limited below to GET)
342      * [RFC7233] 3.1 Range
343      *   A server MUST ignore a Range header field received with a request
344      *   method other than GET.
345      */
346     if (!http_method_get_or_head(r->http_method))
347         return http_status;
348     /* no "Range" in HTTP/1.0 */
349     if (r->http_version < HTTP_VERSION_1_1)
350         return http_status;
351     /* do not attempt to handle range if Transfer-Encoding already applied.
352      * skip Range processing if Content-Encoding has already been applied,
353      * since Range is on the unencoded content length and Content-Encoding
354      * might change content.  This includes if mod_deflate has applied
355      * Content-Encoding.  If Transfer-Encoding: gzip, chunked were used
356      * instead (not done), then Range processing could safely take place, too,
357      * (but mod_deflate would need to run from new hook to handle TE: gzip).
358      * Alternatively, lighttpd.conf could be configured to disable mod_deflate
359      * for Range requests:
360      *   $REQUEST_HEADER["Range"] != "" { deflate.mimetypes = () }
361      */
362     if ((r->resp_htags
363          & (light_bshift(HTTP_HEADER_TRANSFER_ENCODING)
364            |light_bshift(HTTP_HEADER_CONTENT_ENCODING))))
365         return http_status;
366   #if 0 /*(if Range already handled, HTTP status expected to be 206, not 200)*/
367     /* check if Range request already handled (single range or multipart)*/
368     if (light_btst(r->resp_htags, HTTP_HEADER_CONTENT_RANGE))
369         return http_status;
370     const buffer * const content_type =
371       http_header_response_get(r, HTTP_HEADER_CONTENT_TYPE,
372                                CONST_STR_LEN("Content-Type"));
373     if (content_type
374         && 0 == strncasecmp(content_type->ptr,
375                             "multipart/byteranges",
376                             sizeof("multipart/byteranges")-1))
377         return http_status;
378   #endif
379 
380     /* optional: advertise Accept-Ranges: bytes
381      * Even if "Accept-Ranges: bytes" is not given,
382      * [RFC7233] 2.3 Accept-Ranges
383      *   A client MAY generate range requests without having received this
384      *   header for the resource involved.
385      */
386     if (!light_btst(r->resp_htags, HTTP_HEADER_ACCEPT_RANGES))
387         http_header_response_set(r, HTTP_HEADER_ACCEPT_RANGES,
388                                  CONST_STR_LEN("Accept-Ranges"),
389                                  CONST_STR_LEN("bytes"));
390     else {
391         const buffer * const accept_ranges =
392           http_header_response_get(r, HTTP_HEADER_ACCEPT_RANGES,
393                                    CONST_STR_LEN("Accept-Ranges"));
394       #ifdef __COVERITY__
395         force_assert(accept_ranges); /*(r->resp_htags checked above)*/
396       #endif
397         if (buffer_eq_slen(accept_ranges, CONST_STR_LEN("none")))
398             return http_status;
399     }
400 
401     /* limit range handling to GET
402      * [RFC7233] 3.1 Range
403      *   A server MUST ignore a Range header field received with a request
404      *   method other than GET.
405      */
406     if (r->http_method != HTTP_METHOD_GET)
407         return http_status;
408 
409     /* check for Range request */
410     const buffer * const http_range =
411       http_header_request_get(r, HTTP_HEADER_RANGE, CONST_STR_LEN("Range"));
412     if (!http_range)
413         return http_status;
414 
415     /* [RFC7233] 3.2 If-Range
416      *   If-Range = entity-tag / HTTP-date
417      *   A client MUST NOT generate an If-Range header field containing an
418      *   entity-tag that is marked as weak.
419      *   [...]
420      *   Note that this comparison by exact match, including when the validator
421      *   is an HTTP-date, differs from the "earlier than or equal to" comparison
422      *   used when evaluating an If-Unmodified-Since conditional. */
423     if (light_btst(r->rqst_htags, HTTP_HEADER_IF_RANGE)) {
424         const buffer * const if_range =
425           http_header_request_get(r, HTTP_HEADER_IF_RANGE,
426                                   CONST_STR_LEN("If-Range"));
427       #ifdef __COVERITY__
428         force_assert(if_range); /*(r->rqst_htags checked above)*/
429       #endif
430         /* (weak ETag W/"<etag>" will not match Last-Modified) */
431         const buffer * const cmp = (if_range->ptr[0] == '"')
432           ? http_header_response_get(r, HTTP_HEADER_ETAG,
433                                      CONST_STR_LEN("ETag"))
434           : http_header_response_get(r, HTTP_HEADER_LAST_MODIFIED,
435                                      CONST_STR_LEN("Last-Modified"));
436         if (!cmp || !buffer_is_equal(if_range, cmp))
437             return http_status;
438 
439       #if 0 /*(questionable utility; not deemed worthwhile)*/
440         /* (In a modular server, the following RFC recommentation might be
441          *  expensive and invasive to implement perfectly, so only making an
442          *  effort here to comply with known headers added within this routine
443          *  and within the purview of Range requests)
444          * [RFC7233] 4.1 206 Partial Content
445          *   If a 206 is generated in response to a request with an If-Range
446          *   header field, the sender SHOULD NOT generate other representation
447          *   header fields beyond those required above, because the client is
448          *   understood to already have a prior response containing those header
449          *   fields.
450          */
451         http_header_response_unset(r, HTTP_HEADER_ACCEPT_RANGES,
452                                    CONST_STR_LEN("Accept-Ranges"));
453       #endif
454     }
455 
456     return http_range_process(r, http_range);
457 }
458