xref: /lighttpd1.4/src/chunk.c (revision da1c7ad2)
1 #include "first.h"
2 
3 /**
4  * the network chunk-API
5  *
6  *
7  */
8 
9 #include "chunk.h"
10 #include "fdevent.h"
11 #include "log.h"
12 
13 #include <sys/types.h>
14 #include <sys/stat.h>
15 #include "sys-mmap.h"
16 #include "sys-setjmp.h"
17 
18 #include <stdlib.h>
19 #include <fcntl.h>
20 #include <unistd.h>
21 
22 #include <errno.h>
23 #include <string.h>
24 
25 
26 #ifdef HAVE_MMAP
27 
28 #define MMAP_CHUNK_SIZE (512*1024)
29 
30 __attribute_cold__
31 /*__attribute_noinline__*/
32 static off_t
mmap_pagemask(void)33 mmap_pagemask (void)
34 {
35   #ifndef _WIN32
36     long pagesize = sysconf(_SC_PAGESIZE);
37   #else
38     long pagesize = -1; /*(not implemented (yet))*/
39   #endif
40     if (-1 == pagesize) pagesize = 4096;
41     force_assert(pagesize < MMAP_CHUNK_SIZE);
42     return ~((off_t)pagesize - 1); /* pagesize always power-of-2 */
43 }
44 
45 #if 0
46 static off_t
47 mmap_align_offset (off_t start)
48 {
49     static off_t pagemask = 0;
50     if (0 == pagemask)
51         pagemask = mmap_pagemask();
52     return (start & pagemask);
53 }
54 #endif
55 
56 #define mmap_align_offset(offset) ((offset) & chunk_pagemask)
57 static off_t chunk_pagemask = 0;
58 static int chunk_mmap_flags = MAP_SHARED;
59 
60 #endif /* HAVE_MMAP */
61 
62 
63 /* default 1 MB */
64 #define DEFAULT_TEMPFILE_SIZE (1 * 1024 * 1024)
65 
66 static size_t chunk_buf_sz = 8192;
67 static chunk *chunks, *chunks_oversized, *chunks_filechunk;
68 static chunk *chunk_buffers;
69 static int chunks_oversized_n;
70 static const array *chunkqueue_default_tempdirs = NULL;
71 static off_t chunkqueue_default_tempfile_size = DEFAULT_TEMPFILE_SIZE;
72 
chunkqueue_set_chunk_size(size_t sz)73 void chunkqueue_set_chunk_size (size_t sz)
74 {
75     size_t x = 1024;
76     while (x < sz && x < (1u << 30)) x <<= 1;
77     chunk_buf_sz = sz > 0 ? x : 8192;
78 }
79 
chunkqueue_set_tempdirs_default_reset(void)80 void chunkqueue_set_tempdirs_default_reset (void)
81 {
82     chunk_buf_sz = 8192;
83     chunkqueue_default_tempdirs = NULL;
84     chunkqueue_default_tempfile_size = DEFAULT_TEMPFILE_SIZE;
85 
86   #ifdef HAVE_MMAP /*(extend this func to initialize statics at startup)*/
87     if (0 == chunk_pagemask)
88         chunk_pagemask = mmap_pagemask();
89     chunk_mmap_flags = MAP_SHARED;
90   #endif
91 }
92 
chunkqueue_init(chunkqueue * cq)93 chunkqueue *chunkqueue_init(chunkqueue *cq) {
94 	/* (if caller passes non-NULL cq, it must be 0-init) */
95 	if (NULL == cq)
96 		cq = ck_calloc(1, sizeof(*cq));
97 
98       #if 0 /*(zeroed by calloc())*/
99 	cq->first = NULL;
100 	cq->last = NULL;
101       #endif
102 
103 	cq->tempdirs              = chunkqueue_default_tempdirs;
104 	cq->upload_temp_file_size = chunkqueue_default_tempfile_size;
105 
106 	return cq;
107 }
108 
109 __attribute_returns_nonnull__
chunk_init(void)110 static chunk *chunk_init(void) {
111 	chunk * const restrict c = ck_calloc(1, sizeof(*c));
112 
113       #if 0 /*(zeroed by calloc())*/
114 	c->type = MEM_CHUNK;
115 	c->next = NULL;
116 	c->offset = 0;
117 	c->file.length = 0;
118 	c->file.is_temp = 0;
119 	c->file.view = NULL;
120       #endif
121 	c->file.fd = -1;
122 
123 	c->mem = buffer_init();
124 	return c;
125 }
126 
127 __attribute_noinline__
128 __attribute_returns_nonnull__
chunk_init_sz(size_t sz)129 static chunk *chunk_init_sz(size_t sz) {
130 	chunk * const restrict c = chunk_init();
131 	buffer_string_prepare_copy(c->mem, sz-1);
132 	return c;
133 }
134 
135 #ifdef HAVE_MMAP
136 
137 __attribute_malloc__
138 __attribute_returns_nonnull__
chunk_file_view_init(void)139 static void * chunk_file_view_init (void) {
140     chunk_file_view * const restrict cfv = ck_calloc(1, sizeof(*cfv));
141     cfv->mptr = MAP_FAILED;
142   #if 0 /*(zeroed by calloc())*/
143     cfv->mlen = 0;
144     cfv->foff = 0;
145   #endif
146     cfv->refcnt = 1;
147     return cfv;
148 }
149 
__attribute_nonnull__()150 __attribute_nonnull__()
151 static chunk_file_view * chunk_file_view_release (chunk_file_view *cfv) {
152     if (0 == --cfv->refcnt) {
153         if (MAP_FAILED != cfv->mptr)
154             munmap(cfv->mptr, (size_t)cfv->mlen);
155         free(cfv);
156     }
157     return NULL;
158 }
159 
160 __attribute_cold__
161 __attribute_noinline__
__attribute_nonnull__()162 __attribute_nonnull__()
163 static chunk_file_view * chunk_file_view_failed (chunk_file_view *cfv) {
164     return chunk_file_view_release(cfv);
165 }
166 
167 #endif /* HAVE_MMAP */
168 
169 ssize_t
chunk_file_pread(int fd,void * buf,size_t count,off_t offset)170 chunk_file_pread (int fd, void *buf, size_t count, off_t offset)
171 {
172     /*(expects open file for FILE_CHUNK)*/
173   #ifndef HAVE_PREAD
174     /*(On systems without pread() or equivalent, lseek() is repeated if this
175      * func is called in a loop, but this func is generally used on small files,
176      * or reading a small bit at a time.  Even in the case of mod_deflate, files
177      * are not expected to be excessively large.) */
178     if (-1 == lseek(fd, offset, SEEK_SET))
179         return -1;
180   #endif
181 
182     ssize_t rd;
183     do {
184       #ifdef HAVE_PREAD
185         rd =pread(fd, buf, count, offset);
186       #else
187         rd = read(fd, buf, count);
188       #endif
189     } while (-1 == rd && errno == EINTR);
190     return rd;
191 }
192 
chunk_reset_file_chunk(chunk * c)193 static void chunk_reset_file_chunk(chunk *c) {
194 	if (c->file.is_temp) {
195 		c->file.is_temp = 0;
196 		if (!buffer_is_blank(c->mem))
197 			unlink(c->mem->ptr);
198 	}
199 	if (c->file.refchg) {
200 		c->file.refchg(c->file.ref, -1);
201 		c->file.refchg = 0; /* NULL fn ptr */
202 		c->file.ref = NULL;
203 	}
204 	else if (c->file.fd != -1) {
205 		close(c->file.fd);
206 	}
207   #ifdef HAVE_MMAP
208 	if (c->file.view)
209 		c->file.view = chunk_file_view_release(c->file.view);
210   #endif
211 	c->file.fd = -1;
212 	c->file.length = 0;
213 	c->type = MEM_CHUNK;
214 }
215 
chunk_reset(chunk * c)216 static void chunk_reset(chunk *c) {
217 	if (c->type == FILE_CHUNK) chunk_reset_file_chunk(c);
218 
219 	buffer_clear(c->mem);
220 	c->offset = 0;
221 }
222 
chunk_free(chunk * c)223 static void chunk_free(chunk *c) {
224 	if (c->type == FILE_CHUNK) chunk_reset_file_chunk(c);
225 	buffer_free(c->mem);
226 	free(c);
227 }
228 
chunk_pop_oversized(size_t sz)229 static chunk * chunk_pop_oversized(size_t sz) {
230     /* future: might have buckets of certain sizes, up to socket buf sizes */
231     if (chunks_oversized && chunks_oversized->mem->size >= sz) {
232         --chunks_oversized_n;
233         chunk *c = chunks_oversized;
234         chunks_oversized = c->next;
235         return c;
236     }
237     return NULL;
238 }
239 
chunk_push_oversized(chunk * const c,const size_t sz)240 static void chunk_push_oversized(chunk * const c, const size_t sz) {
241     /* XXX: chunk_buffer_yield() may have removed need for list size limit */
242     if (chunks_oversized_n < 64 && chunk_buf_sz >= 4096) {
243         ++chunks_oversized_n;
244         chunk **co = &chunks_oversized;
245         while (*co && sz < (*co)->mem->size) co = &(*co)->next;
246         c->next = *co;
247         *co = c;
248     }
249     else {
250         buffer * const tb = chunks_oversized ? chunks_oversized->mem : NULL;
251         if (tb && tb->size < sz) {
252             /* swap larger mem block onto head of list; free smaller mem */
253             chunks_oversized->mem = c->mem;
254             c->mem = tb;
255         }
256         chunk_free(c);
257     }
258 }
259 
260 __attribute_noinline__
261 __attribute_returns_nonnull__
chunk_buffer_acquire_sz(const size_t sz)262 static buffer * chunk_buffer_acquire_sz(const size_t sz) {
263     chunk *c;
264     buffer *b;
265     if (sz <= (chunk_buf_sz|1)) {
266         if (chunks) {
267             c = chunks;
268             chunks = c->next;
269         }
270         else
271             c = chunk_init_sz(chunk_buf_sz);
272     }
273     else {
274         c = chunk_pop_oversized(sz);
275         if (NULL == c) {
276             /*(round up to nearest chunk_buf_sz)*/
277             /* NB: round down power-2 + 1 to avoid excess allocation
278              * (sz & ~1uL) relies on buffer_realloc() adding +1 *and* on callers
279              * of this func never passing power-2 + 1 sz unless direct caller
280              * adds +1 for '\0', as is done in chunk_buffer_prepare_append() */
281             c = chunk_init_sz(((sz&~1uL)+(chunk_buf_sz-1)) & ~(chunk_buf_sz-1));
282         }
283     }
284     c->next = chunk_buffers;
285     chunk_buffers = c;
286     b = c->mem;
287     c->mem = NULL;
288     return b;
289 }
290 
chunk_buffer_acquire(void)291 buffer * chunk_buffer_acquire(void) {
292     return chunk_buffer_acquire_sz(chunk_buf_sz);
293 }
294 
chunk_buffer_release(buffer * b)295 void chunk_buffer_release(buffer *b) {
296     if (NULL == b) return;
297     if (chunk_buffers) {
298         chunk *c = chunk_buffers;
299         chunk_buffers = c->next;
300         c->mem = b;
301         buffer_clear(b);
302         if (b->size == (chunk_buf_sz|1)) {
303             c->next = chunks;
304             chunks = c;
305         }
306         else if (b->size > chunk_buf_sz)
307             chunk_push_oversized(c, b->size);
308         else
309             chunk_free(c);
310     }
311     else {
312         buffer_free(b);
313     }
314 }
315 
chunk_buffer_yield(buffer * b)316 void chunk_buffer_yield(buffer *b) {
317     if (b->size == (chunk_buf_sz|1)) return;
318 
319     buffer * const cb = chunk_buffer_acquire_sz(chunk_buf_sz);
320     buffer tb = *b;
321     *b = *cb;
322     *cb = tb;
323     chunk_buffer_release(cb);
324 }
325 
chunk_buffer_prepare_append(buffer * const b,size_t sz)326 size_t chunk_buffer_prepare_append(buffer * const b, size_t sz) {
327     if (sz > buffer_string_space(b)) {
328         sz += b->used ? b->used : 1;
329         buffer * const cb = chunk_buffer_acquire_sz(sz);
330         /* swap buffer contents and copy original b->ptr into larger b->ptr */
331         /*(this does more than buffer_move())*/
332         buffer tb = *b;
333         *b = *cb;
334         *cb = tb;
335         if ((b->used = tb.used))
336             memcpy(b->ptr, tb.ptr, tb.used);
337         chunk_buffer_release(cb);
338     }
339     return buffer_string_space(b);
340 }
341 
342 __attribute_noinline__
343 __attribute_returns_nonnull__
chunk_acquire(size_t sz)344 static chunk * chunk_acquire(size_t sz) {
345     if (sz <= (chunk_buf_sz|1)) {
346         if (chunks) {
347             chunk *c = chunks;
348             chunks = c->next;
349             return c;
350         }
351         sz = chunk_buf_sz;
352     }
353     else {
354         /*(round up to nearest chunk_buf_sz)*/
355         sz = (sz + (chunk_buf_sz-1)) & ~(chunk_buf_sz-1);
356         chunk *c = chunk_pop_oversized(sz);
357         if (c) return c;
358     }
359 
360     return chunk_init_sz(sz);
361 }
362 
chunk_release(chunk * c)363 static void chunk_release(chunk *c) {
364     const size_t sz = c->mem->size;
365     if (sz == (chunk_buf_sz|1)) {
366         chunk_reset(c);
367         c->next = chunks;
368         chunks = c;
369     }
370     else if (sz > chunk_buf_sz) {
371         chunk_reset(c);
372         chunk_push_oversized(c, sz);
373     }
374     else if (c->type == FILE_CHUNK) {
375         chunk_reset(c);
376         c->next = chunks_filechunk;
377         chunks_filechunk = c;
378     }
379     else {
380         chunk_free(c);
381     }
382 }
383 
384 __attribute_returns_nonnull__
chunk_acquire_filechunk(void)385 static chunk * chunk_acquire_filechunk(void) {
386     if (chunks_filechunk) {
387         chunk *c = chunks_filechunk;
388         chunks_filechunk = c->next;
389         return c;
390     }
391     return chunk_init();
392 }
393 
chunkqueue_chunk_pool_clear(void)394 void chunkqueue_chunk_pool_clear(void)
395 {
396     for (chunk *next, *c = chunks; c; c = next) {
397         next = c->next;
398         chunk_free(c);
399     }
400     chunks = NULL;
401     for (chunk *next, *c = chunks_oversized; c; c = next) {
402         next = c->next;
403         chunk_free(c);
404     }
405     chunks_oversized = NULL;
406     chunks_oversized_n = 0;
407     for (chunk *next, *c = chunks_filechunk; c; c = next) {
408         next = c->next;
409         chunk_free(c);
410     }
411     chunks_filechunk = NULL;
412 }
413 
chunkqueue_chunk_pool_free(void)414 void chunkqueue_chunk_pool_free(void)
415 {
416     chunkqueue_chunk_pool_clear();
417     for (chunk *next, *c = chunk_buffers; c; c = next) {
418         next = c->next;
419       #if 1 /*(chunk_buffers contains MEM_CHUNK with (c->mem == NULL))*/
420         free(c);
421       #else /*(c->mem = buffer_init() is no longer necessary below)*/
422         c->mem = buffer_init(); /*(chunk_reset() expects c->mem != NULL)*/
423         chunk_free(c);
424       #endif
425     }
426     chunk_buffers = NULL;
427 }
428 
429 __attribute_pure__
chunk_remaining_length(const chunk * c)430 static off_t chunk_remaining_length(const chunk *c) {
431     /* MEM_CHUNK or FILE_CHUNK */
432     return (c->type == MEM_CHUNK
433               ? (off_t)buffer_clen(c->mem)
434               : c->file.length)
435            - c->offset;
436 }
437 
chunkqueue_release_chunks(chunkqueue * cq)438 static void chunkqueue_release_chunks(chunkqueue *cq) {
439     cq->last = NULL;
440     for (chunk *c; (c = cq->first); ) {
441         cq->first = c->next;
442         chunk_release(c);
443     }
444 }
445 
chunkqueue_free(chunkqueue * cq)446 void chunkqueue_free(chunkqueue *cq) {
447     if (NULL == cq) return;
448     chunkqueue_release_chunks(cq);
449     free(cq);
450 }
451 
chunkqueue_prepend_chunk(chunkqueue * const restrict cq,chunk * const restrict c)452 static void chunkqueue_prepend_chunk(chunkqueue * const restrict cq, chunk * const restrict c) {
453     if (NULL == (c->next = cq->first)) cq->last = c;
454     cq->first = c;
455 }
456 
chunkqueue_append_chunk(chunkqueue * const restrict cq,chunk * const restrict c)457 static void chunkqueue_append_chunk(chunkqueue * const restrict cq, chunk * const restrict c) {
458     c->next = NULL;
459     *(cq->last ? &cq->last->next : &cq->first) = c;
460     cq->last = c;
461 }
462 
463 __attribute_returns_nonnull__
chunkqueue_prepend_mem_chunk(chunkqueue * cq,size_t sz)464 static chunk * chunkqueue_prepend_mem_chunk(chunkqueue *cq, size_t sz) {
465     chunk *c = chunk_acquire(sz);
466     chunkqueue_prepend_chunk(cq, c);
467     return c;
468 }
469 
470 __attribute_returns_nonnull__
chunkqueue_append_mem_chunk(chunkqueue * cq,size_t sz)471 static chunk * chunkqueue_append_mem_chunk(chunkqueue *cq, size_t sz) {
472     chunk *c = chunk_acquire(sz);
473     chunkqueue_append_chunk(cq, c);
474     return c;
475 }
476 
__attribute_nonnull__()477 __attribute_nonnull__()
478 __attribute_returns_nonnull__
479 static chunk * chunkqueue_append_file_chunk(chunkqueue * const restrict cq, const buffer * const restrict fn, off_t offset, off_t len) {
480     chunk * const c = chunk_acquire_filechunk();
481     chunkqueue_append_chunk(cq, c);
482     c->type = FILE_CHUNK;
483     c->offset = offset;
484     c->file.length = offset + len;
485     cq->bytes_in += len;
486     buffer_copy_buffer(c->mem, fn);
487     return c;
488 }
489 
chunkqueue_reset(chunkqueue * cq)490 void chunkqueue_reset(chunkqueue *cq) {
491     chunkqueue_release_chunks(cq);
492     cq->bytes_in = 0;
493     cq->bytes_out = 0;
494     cq->tempdir_idx = 0;
495 }
496 
chunkqueue_append_file_fd(chunkqueue * const restrict cq,const buffer * const restrict fn,int fd,off_t offset,off_t len)497 void chunkqueue_append_file_fd(chunkqueue * const restrict cq, const buffer * const restrict fn, int fd, off_t offset, off_t len) {
498     if (len > 0) {
499         (chunkqueue_append_file_chunk(cq, fn, offset, len))->file.fd = fd;
500     }
501     else {
502         close(fd);
503     }
504 }
505 
chunkqueue_append_file(chunkqueue * const restrict cq,const buffer * const restrict fn,off_t offset,off_t len)506 void chunkqueue_append_file(chunkqueue * const restrict cq, const buffer * const restrict fn, off_t offset, off_t len) {
507     if (len > 0) {
508         chunkqueue_append_file_chunk(cq, fn, offset, len);
509     }
510 }
511 
512 
chunkqueue_append_mem_extend_chunk(chunkqueue * const restrict cq,const char * const restrict mem,size_t len)513 static int chunkqueue_append_mem_extend_chunk(chunkqueue * const restrict cq, const char * const restrict mem, size_t len) {
514 	chunk *c = cq->last;
515 	if (0 == len) return 1;
516 	if (c != NULL && c->type == MEM_CHUNK
517 	    && buffer_string_space(c->mem) >= len) {
518 		buffer_append_string_len(c->mem, mem, len);
519 		cq->bytes_in += len;
520 		return 1;
521 	}
522 	return 0;
523 }
524 
525 
chunkqueue_append_buffer(chunkqueue * const restrict cq,buffer * const restrict mem)526 void chunkqueue_append_buffer(chunkqueue * const restrict cq, buffer * const restrict mem) {
527 	chunk *c;
528 	const size_t len = buffer_clen(mem);
529 	if (len < 1024 && chunkqueue_append_mem_extend_chunk(cq, mem->ptr, len)) {
530 		buffer_clear(mem);
531 		return;
532 	}
533 
534 	c = chunkqueue_append_mem_chunk(cq, chunk_buf_sz);
535 	cq->bytes_in += len;
536 	buffer_move(c->mem, mem);
537 }
538 
539 
chunkqueue_append_mem(chunkqueue * const restrict cq,const char * const restrict mem,size_t len)540 void chunkqueue_append_mem(chunkqueue * const restrict cq, const char * const restrict mem, size_t len) {
541 	chunk *c;
542 	if (len < chunk_buf_sz && chunkqueue_append_mem_extend_chunk(cq, mem, len))
543 		return;
544 
545 	c = chunkqueue_append_mem_chunk(cq, len+1);
546 	cq->bytes_in += len;
547 	buffer_copy_string_len(c->mem, mem, len);
548 }
549 
550 
chunkqueue_append_mem_min(chunkqueue * const restrict cq,const char * const restrict mem,size_t len)551 void chunkqueue_append_mem_min(chunkqueue * const restrict cq, const char * const restrict mem, size_t len) {
552 	chunk *c;
553 	if (len < chunk_buf_sz && chunkqueue_append_mem_extend_chunk(cq, mem, len))
554 		return;
555 
556 	c = chunk_init_sz(len+1);
557 	chunkqueue_append_chunk(cq, c);
558 	cq->bytes_in += len;
559 	buffer_copy_string_len(c->mem, mem, len);
560 }
561 
562 
chunkqueue_append_chunkqueue(chunkqueue * const restrict cq,chunkqueue * const restrict src)563 void chunkqueue_append_chunkqueue(chunkqueue * const restrict cq, chunkqueue * const restrict src) {
564 	if (NULL == src->first) return;
565 
566 	if (NULL == cq->first) {
567 		cq->first = src->first;
568 	} else {
569 		cq->last->next = src->first;
570 	}
571 	cq->last = src->last;
572 	cq->bytes_in += chunkqueue_length(src);
573 
574 	src->first = NULL;
575 	src->last = NULL;
576 	src->bytes_out = src->bytes_in;
577 }
578 
579 
chunkqueue_prepend_buffer_open_sz(chunkqueue * cq,size_t sz)580 buffer * chunkqueue_prepend_buffer_open_sz(chunkqueue *cq, size_t sz) {
581 	chunk * const c = chunkqueue_prepend_mem_chunk(cq, sz);
582 	return c->mem;
583 }
584 
585 
chunkqueue_prepend_buffer_open(chunkqueue * cq)586 buffer * chunkqueue_prepend_buffer_open(chunkqueue *cq) {
587 	return chunkqueue_prepend_buffer_open_sz(cq, chunk_buf_sz);
588 }
589 
590 
chunkqueue_prepend_buffer_commit(chunkqueue * cq)591 void chunkqueue_prepend_buffer_commit(chunkqueue *cq) {
592 	cq->bytes_in += buffer_clen(cq->first->mem);
593 }
594 
595 
chunkqueue_append_buffer_open_sz(chunkqueue * cq,size_t sz)596 buffer * chunkqueue_append_buffer_open_sz(chunkqueue *cq, size_t sz) {
597 	chunk * const c = chunkqueue_append_mem_chunk(cq, sz);
598 	return c->mem;
599 }
600 
601 
chunkqueue_append_buffer_open(chunkqueue * cq)602 buffer * chunkqueue_append_buffer_open(chunkqueue *cq) {
603 	return chunkqueue_append_buffer_open_sz(cq, chunk_buf_sz);
604 }
605 
606 
chunkqueue_append_buffer_commit(chunkqueue * cq)607 void chunkqueue_append_buffer_commit(chunkqueue *cq) {
608 	cq->bytes_in += buffer_clen(cq->last->mem);
609 }
610 
611 
chunkqueue_get_memory(chunkqueue * const restrict cq,size_t * const restrict len)612 char * chunkqueue_get_memory(chunkqueue * const restrict cq, size_t * const restrict len) {
613 	size_t sz = *len ? *len : (chunk_buf_sz >> 1);
614 	buffer *b;
615 	chunk *c = cq->last;
616 	if (NULL != c && MEM_CHUNK == c->type) {
617 		/* return pointer into existing buffer if large enough */
618 		size_t avail = buffer_string_space(c->mem);
619 		if (avail >= sz) {
620 			*len = avail;
621 			b = c->mem;
622 			return b->ptr + buffer_clen(b);
623 		}
624 	}
625 
626 	/* allocate new chunk */
627 	b = chunkqueue_append_buffer_open_sz(cq, sz);
628 	*len = buffer_string_space(b);
629 	return b->ptr;
630 }
631 
chunkqueue_use_memory(chunkqueue * const restrict cq,chunk * ckpt,size_t len)632 void chunkqueue_use_memory(chunkqueue * const restrict cq, chunk *ckpt, size_t len) {
633     buffer *b = cq->last->mem;
634 
635     if (__builtin_expect( (len > 0), 1)) {
636         buffer_commit(b, len);
637         cq->bytes_in += len;
638         if (cq->last == ckpt || NULL == ckpt || MEM_CHUNK != ckpt->type
639             || len > buffer_string_space(ckpt->mem)) return;
640 
641         buffer_append_string_buffer(ckpt->mem, b);
642     }
643     else if (!buffer_is_blank(b)) { /*(cq->last == ckpt)*/
644         return; /* last chunk is not empty */
645     }
646 
647     /* remove empty last chunk */
648     chunk_release(cq->last);
649     cq->last = ckpt;
650     *(ckpt ? &ckpt->next : &cq->first) = NULL;
651 }
652 
chunkqueue_update_file(chunkqueue * const restrict cq,chunk * c,off_t len)653 void chunkqueue_update_file(chunkqueue * const restrict cq, chunk *c, off_t len) {
654     /*assert(c->type == FILE_CHUNK);*/
655     c->file.length += len;
656     cq->bytes_in += len;
657     if (0 == chunk_remaining_length(c))
658         chunkqueue_remove_empty_chunks(cq);
659 }
660 
chunkqueue_set_tempdirs_default(const array * tempdirs,off_t upload_temp_file_size)661 void chunkqueue_set_tempdirs_default (const array *tempdirs, off_t upload_temp_file_size) {
662     if (upload_temp_file_size == 0)
663         upload_temp_file_size = DEFAULT_TEMPFILE_SIZE;
664     chunkqueue_default_tempdirs = tempdirs;
665     chunkqueue_default_tempfile_size = upload_temp_file_size;
666 }
667 
chunkqueue_set_tempdirs(chunkqueue * const restrict cq,const array * const restrict tempdirs,off_t upload_temp_file_size)668 void chunkqueue_set_tempdirs(chunkqueue * const restrict cq, const array * const restrict tempdirs, off_t upload_temp_file_size) {
669     if (upload_temp_file_size == 0)
670         upload_temp_file_size = chunkqueue_default_tempfile_size;
671     cq->tempdirs = tempdirs;
672     cq->upload_temp_file_size = upload_temp_file_size;
673     cq->tempdir_idx = 0;
674 }
675 
676 __attribute_noinline__
chunkqueue_dup_file_chunk_fd(chunk * const restrict d,const chunk * const restrict c)677 static void chunkqueue_dup_file_chunk_fd (chunk * const restrict d, const chunk * const restrict c) {
678     /*assert(d != c);*/
679     /*assert(d->type == FILE_CHUNK);*/
680     /*assert(c->type == FILE_CHUNK);*/
681     if (c->file.fd >= 0) {
682         if (c->file.refchg) {
683             d->file.fd = c->file.fd;
684             d->file.ref = c->file.ref;
685             d->file.refchg = c->file.refchg;
686             d->file.refchg(d->file.ref, 1);
687         }
688         else
689             d->file.fd = fdevent_dup_cloexec(c->file.fd);
690       #ifdef HAVE_MMAP
691         if ((d->file.view = c->file.view))
692             ++d->file.view->refcnt;
693       #endif
694     }
695 }
696 
697 __attribute_noinline__
chunkqueue_steal_partial_file_chunk(chunkqueue * const restrict dest,const chunk * const restrict c,const off_t len)698 static void chunkqueue_steal_partial_file_chunk(chunkqueue * const restrict dest, const chunk * const restrict c, const off_t len) {
699     chunkqueue_append_file(dest, c->mem, c->offset, len);
700     chunkqueue_dup_file_chunk_fd(dest->last, c);
701 }
702 
chunkqueue_steal(chunkqueue * const restrict dest,chunkqueue * const restrict src,off_t len)703 void chunkqueue_steal(chunkqueue * const restrict dest, chunkqueue * const restrict src, off_t len) {
704 	/*(0-length first chunk (unexpected) is removed from src even if len == 0;
705          * progress is made when caller loops on this func)*/
706 	off_t clen;
707 	do {
708 		chunk * const c = src->first;
709 		if (__builtin_expect( (NULL == c), 0)) break;
710 
711 		clen = chunk_remaining_length(c);
712 
713 		if (len >= clen) {
714 			/* move complete chunk */
715 			src->first = c->next;
716 			if (c == src->last) src->last = NULL;
717 
718 			if (__builtin_expect( (0 != clen), 1)) {
719 				chunkqueue_append_chunk(dest, c);
720 				dest->bytes_in += clen;
721 			}
722 			else /* drop empty chunk */
723 				chunk_release(c);
724 		} else {
725 			/* copy partial chunk */
726 
727 			switch (c->type) {
728 			case MEM_CHUNK:
729 				chunkqueue_append_mem(dest, c->mem->ptr + c->offset, len);
730 				break;
731 			case FILE_CHUNK:
732 				/* tempfile flag is in "last" chunk after the split */
733 				chunkqueue_steal_partial_file_chunk(dest, c, len);
734 				break;
735 			}
736 
737 			c->offset += len;
738 			clen = len;
739 		}
740 
741 		src->bytes_out += clen;
742 	} while ((len -= clen));
743 }
744 
chunkqueue_get_append_mkstemp(buffer * const b,const char * path,const uint32_t len)745 static int chunkqueue_get_append_mkstemp(buffer * const b, const char *path, const uint32_t len) {
746     buffer_copy_path_len2(b,path,len,CONST_STR_LEN("lighttpd-upload-XXXXXX"));
747   #if defined(HAVE_SPLICE) && defined(HAVE_PWRITE)
748     /*(splice() rejects O_APPEND target; omit flag if also using pwrite())*/
749     return fdevent_mkostemp(b->ptr, 0);
750   #else
751     return fdevent_mkostemp(b->ptr, O_APPEND);
752   #endif
753 }
754 
chunkqueue_get_append_newtempfile(chunkqueue * const restrict cq,log_error_st * const restrict errh)755 static chunk *chunkqueue_get_append_newtempfile(chunkqueue * const restrict cq, log_error_st * const restrict errh) {
756     static const buffer emptyb = { "", 0, 0 };
757     chunk * const restrict last = cq->last;
758     chunk * const restrict c = chunkqueue_append_file_chunk(cq, &emptyb, 0, 0);
759     buffer * const restrict template = c->mem;
760     c->file.is_temp = 1;
761 
762     if (cq->tempdirs && cq->tempdirs->used) {
763         /* we have several tempdirs, only if all of them fail we jump out */
764         for (errno = EIO; cq->tempdir_idx < cq->tempdirs->used; ++cq->tempdir_idx) {
765             data_string *ds = (data_string *)cq->tempdirs->data[cq->tempdir_idx];
766             c->file.fd =
767               chunkqueue_get_append_mkstemp(template, BUF_PTR_LEN(&ds->value));
768             if (-1 != c->file.fd) return c;
769         }
770     }
771     else {
772         c->file.fd =
773           chunkqueue_get_append_mkstemp(template, CONST_STR_LEN("/var/tmp"));
774         if (-1 != c->file.fd) return c;
775     }
776 
777     /* (report only last error to mkstemp() even if multiple temp dirs tried) */
778     log_perror(errh, __FILE__, __LINE__,
779       "opening temp-file failed: %s", template->ptr);
780     /* remove (failed) final chunk */
781     c->file.is_temp = 0;
782     if ((cq->last = last))
783         last->next = NULL;
784     else
785         cq->first = NULL;
786     chunk_release(c);
787     return NULL;
788 }
789 
chunkqueue_get_append_tempfile(chunkqueue * const restrict cq,log_error_st * const restrict errh)790 static chunk *chunkqueue_get_append_tempfile(chunkqueue * const restrict cq, log_error_st * const restrict errh) {
791     /*
792      * if the last chunk is
793      * - smaller than cq->upload_temp_file_size
794      * -> append to it (and it then might exceed cq->upload_temp_file_size)
795      * otherwise
796      * -> create a new chunk
797      */
798 
799     chunk * const c = cq->last;
800     if (NULL != c && c->file.is_temp && c->file.fd >= 0) {
801 
802         if (c->file.length < (off_t)cq->upload_temp_file_size)
803             return c; /* ok, take the last chunk for our job */
804 
805         /* the chunk is too large now, close it */
806         force_assert(0 == c->file.refchg); /*(else should not happen)*/
807         int rc = close(c->file.fd);
808         c->file.fd = -1;
809         if (0 != rc) {
810             log_perror(errh, __FILE__, __LINE__,
811               "close() temp-file %s failed", c->mem->ptr);
812             return NULL;
813         }
814     }
815     return chunkqueue_get_append_newtempfile(cq, errh);
816 }
817 
818 __attribute_cold__
chunkqueue_append_tempfile_err(chunkqueue * const cq,log_error_st * const restrict errh,chunk * const c)819 static int chunkqueue_append_tempfile_err(chunkqueue * const cq, log_error_st * const restrict errh, chunk * const c) {
820     const int errnum = errno;
821     if (errnum == EINTR) return 1; /* retry */
822 
823     int retry = (errnum == ENOSPC && cq->tempdirs
824                  && ++cq->tempdir_idx < cq->tempdirs->used);
825     if (!retry)
826         log_perror(errh, __FILE__, __LINE__,
827           "write() temp-file %s failed", c->mem->ptr);
828 
829     if (0 == chunk_remaining_length(c)) {
830         /*(remove empty chunk and unlink tempfile)*/
831         chunkqueue_remove_empty_chunks(cq);
832     }
833     else {/*(close tempfile; avoid later attempts to append)*/
834         force_assert(0 == c->file.refchg); /*(else should not happen)*/
835         int rc = close(c->file.fd);
836         c->file.fd = -1;
837         if (0 != rc) {
838             log_perror(errh, __FILE__, __LINE__,
839               "close() temp-file %s failed", c->mem->ptr);
840             retry = 0;
841         }
842     }
843     return retry;
844 }
845 
846 __attribute_cold__
847 __attribute_noinline__
chunkqueue_to_tempfiles(chunkqueue * const restrict dest,log_error_st * const restrict errh)848 static int chunkqueue_to_tempfiles(chunkqueue * const restrict dest, log_error_st * const restrict errh) {
849     /* transfer chunks from dest to src, adjust dest->bytes_in, and then call
850      * chunkqueue_steal_with_tempfiles() to write chunks from src back into
851      * dest, but into tempfiles.   chunkqueue_steal_with_tempfiles() calls back
852      * into chunkqueue_append_mem_to_tempfile(), but will not re-enter this func
853      * since chunks moved to src, and dest made empty before recursive call */
854     const off_t cqlen = chunkqueue_length(dest);
855     chunkqueue src = *dest; /*(copy struct)*/
856     dest->first = dest->last = NULL;
857     dest->bytes_in -= cqlen;
858     if (0 == chunkqueue_steal_with_tempfiles(dest, &src, cqlen, errh))
859         return 0;
860     else {
861         const int errnum = errno;
862         chunkqueue_release_chunks(&src);
863         return -errnum;
864     }
865 }
866 
chunkqueue_append_mem_to_tempfile(chunkqueue * const restrict dest,const char * restrict mem,size_t len,log_error_st * const restrict errh)867 int chunkqueue_append_mem_to_tempfile(chunkqueue * const restrict dest, const char * restrict mem, size_t len, log_error_st * const restrict errh) {
868 	chunk *dst_c = dest->first;
869 
870 	/* check if prior MEM_CHUNK(s) exist and write to tempfile
871 	 * (check first chunk only, since if we are using tempfiles, then
872 	 *  we expect further chunks to be tempfiles after starting tempfiles)*/
873 	if (dst_c && dst_c->type == MEM_CHUNK
874 	    && 0 != chunkqueue_to_tempfiles(dest, errh)) {
875 		return -1;
876 	}
877 
878 	do {
879 		/*(aside: arg len is permitted to be 0 and creates tempfile as a
880 		 * side effect.  This is used by mod_ssi for ssi exec, as the func
881 		 * chunkqueue_get_append_tempfile() is not public.  The result is
882 		 * an empty chunk at the end of the chunkqueue, which typically
883 		 * should be avoided)*/
884 		dst_c = chunkqueue_get_append_tempfile(dest, errh);
885 		if (NULL == dst_c)
886 			return -1;
887 	      #ifdef __COVERITY__
888 		if (dst_c->file.fd < 0) return -1;
889 	      #endif
890 	      #ifdef HAVE_PWRITE
891 		/* coverity[negative_returns : FALSE] */
892 		const ssize_t written =pwrite(dst_c->file.fd, mem, len, dst_c->file.length);
893 	      #else
894 		/* coverity[negative_returns : FALSE] */
895 		const ssize_t written = write(dst_c->file.fd, mem, len);
896 	      #endif
897 
898 		if ((size_t) written == len) {
899 			dst_c->file.length += len;
900 			dest->bytes_in += len;
901 			return 0;
902 		} else if (written >= 0) {
903 			/*(assume EINTR if partial write and retry write();
904 			 * retry write() might fail with ENOSPC if no more space on volume)*/
905 			dest->bytes_in += written;
906 			mem += written;
907 			len -= (size_t)written;
908 			dst_c->file.length += (size_t)written;
909 			/* continue; retry */
910 		} else if (!chunkqueue_append_tempfile_err(dest, errh, dst_c)) {
911 			break; /* return -1; */
912 		} /* else continue; retry */
913 	} while (len);
914 
915 	return -1;
916 }
917 
918 #ifdef HAVE_PWRITEV
919 
920 #ifdef HAVE_SYS_UIO_H
921 #include <sys/uio.h>
922 #endif
923 
924 __attribute_cold__
925 __attribute_noinline__
chunkqueue_append_cqmem_to_tempfile_partial(chunkqueue * const dest,chunk * const c,ssize_t wr,log_error_st * const restrict errh)926 static ssize_t chunkqueue_append_cqmem_to_tempfile_partial(chunkqueue * const dest, chunk * const c, ssize_t wr, log_error_st * const restrict errh) {
927     /* recover from partial write of existing dest MEM_CHUNK to tempfile */
928     chunk *ckpt = dest->first;
929     while (ckpt->next != c) ckpt = ckpt->next;
930     ckpt->next = NULL;
931     dest->last = ckpt;
932     dest->bytes_in  -= wr; /*(avoid double count in dest cq)*/
933     dest->bytes_out -= wr;
934     chunkqueue_mark_written(dest, wr);/*(remove MEM_CHUNK written to tempfile)*/
935 
936     c->next = dest->first; /*(place tempfile at beginning of dest cq)*/
937     dest->first = c;
938     return (0 == chunkqueue_to_tempfiles(dest, errh)) ? 0 : -1;
939 }
940 
chunkqueue_append_cqmem_to_tempfile(chunkqueue * const restrict dest,chunkqueue * const restrict src,off_t len,log_error_st * const restrict errh)941 static ssize_t chunkqueue_append_cqmem_to_tempfile(chunkqueue * const restrict dest, chunkqueue * const restrict src, off_t len, log_error_st * const restrict errh) {
942     /* write multiple MEM_CHUNKs to tempfile in single pwritev() syscall */
943     /*(could lseek() and writev() if pwritev() is not available,
944      * but if writev() is available, pwritev() is likely available,
945      * e.g. any modern Linux or *BSD, and possibly anything not Windows)*/
946     unsigned int iovcnt = 0;
947     struct iovec iov[16];
948 
949     off_t dlen = 0;
950     chunk *c;
951     for (c = dest->first; c && c->type == MEM_CHUNK; c = c->next) {
952         const off_t clen = chunk_remaining_length(c);
953         iov[iovcnt].iov_base = c->mem->ptr + c->offset;
954         iov[iovcnt].iov_len  = (size_t)clen;
955         dlen += clen;
956         ++iovcnt;
957         if (__builtin_expect( (iovcnt == sizeof(iov)/sizeof(*iov)), 0))
958             break; /*(not expecting large number of MEM_CHUNK)*/
959     }
960     if (__builtin_expect( (c != NULL), 0) && dest->first->type == MEM_CHUNK) {
961         /*(expecting only MEM_CHUNK if dest cq starts w/ MEM_CHUNK)*/
962         /*(use less efficient fallback if that assumption does not hold true)*/
963         if (0 != chunkqueue_to_tempfiles(dest, errh))
964             return -1;
965         dlen = 0;
966         iovcnt = 0;
967     }
968 
969     if (__builtin_expect( (iovcnt < sizeof(iov)/sizeof(*iov)), 1)) {
970         for (c = src->first; c && c->type == MEM_CHUNK; c = c->next) {
971             off_t clen = chunk_remaining_length(c);
972             if (clen > len) clen = len;
973             iov[iovcnt].iov_base = c->mem->ptr + c->offset;
974             iov[iovcnt].iov_len  = (size_t)clen;
975             len -= clen;
976             ++iovcnt;
977             if (0 == len) break;
978             if (__builtin_expect( (iovcnt == sizeof(iov)/sizeof(*iov)), 0))
979                 break; /*(not expecting large number of MEM_CHUNK)*/
980         }
981     }
982 
983     if (__builtin_expect( (0 == iovcnt), 0)) return 0; /*(should not happen)*/
984 
985     c = chunkqueue_get_append_tempfile(dest, errh);
986     if (NULL == c)
987         return -1;
988   #ifdef __COVERITY__
989     if (c->file.fd < 0) return -1;
990   #endif
991     /* coverity[negative_returns : FALSE] */
992     ssize_t wr = pwritev(c->file.fd, iov, (int)iovcnt, c->file.length);
993 
994     /*(memory use in chunkqueues is expected to be limited before spilling
995      * to tempfiles, so common case will write entire iovec to tempfile,
996      * and we return amount written *from src cq*, even if partial write;
997      * (not looping here to retry writing more, but caller might loop))*/
998 
999     if (wr >= 0) {
1000         c->file.length += wr;
1001         dest->bytes_in += wr;
1002         if (dlen) {
1003             if (__builtin_expect( (wr < dlen), 0))
1004                 return
1005                   chunkqueue_append_cqmem_to_tempfile_partial(dest,c,wr,errh);
1006             wr -= (ssize_t)dlen;
1007             dest->bytes_in  -= dlen; /*(avoid double count in dest cq)*/
1008             dest->bytes_out -= dlen;
1009             chunkqueue_mark_written(dest, dlen);
1010         }
1011     }
1012     else if (chunkqueue_append_tempfile_err(dest, errh, c))
1013         wr = 0; /*(to trigger continue/retry in caller rather than error)*/
1014 
1015     return wr;
1016 }
1017 
1018 #endif /* HAVE_PWRITEV */
1019 
1020 #ifdef HAVE_SPLICE
1021 
1022 __attribute_cold__
1023 __attribute_noinline__
chunkqueue_append_drain_pipe_tempfile(chunkqueue * const restrict cq,const int fd,unsigned int len,log_error_st * const restrict errh)1024 static ssize_t chunkqueue_append_drain_pipe_tempfile(chunkqueue * const restrict cq, const int fd, unsigned int len, log_error_st * const restrict errh) {
1025     /* attempt to drain full 'len' from pipe
1026      * (even if len not reduced to opts->max_per_read limit)
1027      * since data may have already been moved from socket to pipe
1028      *(returns 0 on success, or -errno (negative errno) if error,
1029      * even if partial write occurred)*/
1030     char buf[16384];
1031     ssize_t rd;
1032     do {
1033         do {
1034             rd = read(fd, buf, sizeof(buf));
1035         } while (rd < 0 && errno == EINTR);
1036         if (rd < 0) break;
1037         if (0 != chunkqueue_append_mem_to_tempfile(cq, buf, (size_t)rd, errh))
1038             break;
1039     } while ((len -= (unsigned int)rd));
1040 
1041     if (0 == len)
1042         return 0;
1043     else {
1044         const int errnum = errno;
1045         if (cq->last && 0 == chunk_remaining_length(cq->last)) {
1046             /*(remove empty chunk and unlink tempfile)*/
1047             chunkqueue_remove_empty_chunks(cq);
1048         }
1049         return -errnum;
1050     }
1051 }
1052 
chunkqueue_append_splice_pipe_tempfile(chunkqueue * const restrict cq,const int fd,unsigned int len,log_error_st * const restrict errh)1053 ssize_t chunkqueue_append_splice_pipe_tempfile(chunkqueue * const restrict cq, const int fd, unsigned int len, log_error_st * const restrict errh) {
1054     /* check if prior MEM_CHUNK(s) exist and write to tempfile
1055      * (check first chunk only, since if we are using tempfiles, then
1056      *  we expect further chunks to be tempfiles after starting tempfiles)*/
1057     if (cq->first && cq->first->type == MEM_CHUNK) {
1058         int rc = chunkqueue_to_tempfiles(cq, errh);
1059         if (__builtin_expect( (0 != rc), 0)) return rc;
1060     }
1061 
1062     /*(returns num bytes written, or -errno (negative errno) if error)*/
1063     ssize_t total = 0;
1064     do {
1065         chunk * const c = chunkqueue_get_append_tempfile(cq, errh);
1066         if (__builtin_expect( (NULL == c), 0)) return -errno;
1067 
1068         loff_t off = c->file.length;
1069         ssize_t wr = splice(fd, NULL, c->file.fd, &off, len,
1070                             SPLICE_F_MOVE | SPLICE_F_NONBLOCK);
1071 
1072         if (__builtin_expect(((size_t)wr == len), 1)) {
1073             c->file.length += len;
1074             cq->bytes_in += len;
1075             return total + len;
1076         }
1077         else if (wr >= 0) {
1078             /*(assume EINTR if partial write and retry;
1079              * retry might fail with ENOSPC if no more space on volume)*/
1080             cq->bytes_in += wr;
1081             total += wr;
1082             len -= (size_t)wr;
1083             c->file.length += (size_t)wr;
1084             /* continue; retry */
1085         }
1086         else {
1087             const int errnum = errno;
1088             switch (errnum) {
1089               case EAGAIN:
1090              #ifdef EWOULDBLOCK
1091              #if EWOULDBLOCK != EAGAIN
1092               case EWOULDBLOCK:
1093              #endif
1094              #endif
1095                 if (0 == chunk_remaining_length(c)) {
1096                     /*(remove empty chunk and unlink tempfile)*/
1097                     chunkqueue_remove_empty_chunks(cq);
1098                 }
1099                 return total;
1100               case EINVAL: /*(assume total == 0 if EINVAL)*/
1101                 wr = chunkqueue_append_drain_pipe_tempfile(cq, fd, len, errh);
1102                 return (0 == wr) ? total + (ssize_t)len : wr;
1103               default:
1104                 if (!chunkqueue_append_tempfile_err(cq, errh, c))
1105                     return -errnum;
1106                 break; /* else continue; retry */
1107             }
1108         }
1109     } while (len);
1110     return -EIO; /*(not reached)*/
1111 }
1112 
1113 static int cqpipes[2] = { -1, -1 };
1114 
1115 __attribute_cold__
1116 __attribute_noinline__
chunkqueue_internal_pipes(int init)1117 void chunkqueue_internal_pipes(int init) {
1118     /*(intended for internal use within a single lighttpd process;
1119      * must be initialized after fork() and graceful-restart to avoid
1120      * sharing pipes between processes)*/
1121     if (-1 != cqpipes[0]) { close(cqpipes[0]); cqpipes[0] = -1; }
1122     if (-1 != cqpipes[1]) { close(cqpipes[1]); cqpipes[1] = -1; }
1123     if (init)
1124         if (0 != fdevent_pipe_cloexec(cqpipes, 262144)) { } /*(ignore error)*/
1125 }
1126 
1127 __attribute_cold__
1128 __attribute_noinline__
chunkqueue_pipe_read_discard(void)1129 static void chunkqueue_pipe_read_discard (void) {
1130     char buf[16384];
1131     ssize_t rd;
1132     do {
1133         rd = read(cqpipes[0], buf, sizeof(buf));
1134     } while (rd > 0 || (rd < 0 && errno == EINTR));
1135     if (rd < 0
1136       #ifdef EWOULDBLOCK
1137       #if EWOULDBLOCK != EAGAIN
1138         && errno != EWOULDBLOCK
1139       #endif
1140       #endif
1141         && errno != EAGAIN) {
1142         chunkqueue_internal_pipes(1); /*(close() and re-initialize)*/
1143     }
1144 }
1145 
chunkqueue_append_splice_sock_tempfile(chunkqueue * const restrict cq,const int fd,unsigned int len,log_error_st * const restrict errh)1146 ssize_t chunkqueue_append_splice_sock_tempfile(chunkqueue * const restrict cq, const int fd, unsigned int len, log_error_st * const restrict errh) {
1147     /*(returns num bytes written, or -errno (negative errno) if error)*/
1148     int * const pipes = cqpipes;
1149     if (-1 == pipes[1])
1150         return -EINVAL; /*(not configured; not handled here)*/
1151 
1152     /* splice() socket data to intermediate pipe */
1153     ssize_t wr = splice(fd, NULL, pipes[1], NULL, len,
1154                         SPLICE_F_MOVE | SPLICE_F_NONBLOCK);
1155     if (__builtin_expect( (wr <= 0), 0))
1156         return -EINVAL; /*(reuse to indicate not handled here)*/
1157     len = (unsigned int)wr;
1158 
1159     /* splice() data from intermediate pipe to tempfile */
1160     wr = chunkqueue_append_splice_pipe_tempfile(cq, pipes[0], len, errh);
1161     if (wr < 0) /* expect (wr == (ssize_t)len) or (wr == -1) */
1162         chunkqueue_pipe_read_discard();/* discard data from intermediate pipe */
1163     return wr;
1164 }
1165 
1166 #endif /* HAVE_SPLICE */
1167 
chunkqueue_steal_with_tempfiles(chunkqueue * const restrict dest,chunkqueue * const restrict src,off_t len,log_error_st * const restrict errh)1168 int chunkqueue_steal_with_tempfiles(chunkqueue * const restrict dest, chunkqueue * const restrict src, off_t len, log_error_st * const restrict errh) {
1169 	/*(0-length first chunk (unexpected) is removed from src even if len == 0;
1170          * progress is made when caller loops on this func)*/
1171 	off_t clen;
1172 	do {
1173 		chunk * const c = src->first;
1174 		if (__builtin_expect( (NULL == c), 0)) break;
1175 
1176 	  #ifdef HAVE_PWRITEV
1177 
1178 		if (c->type == MEM_CHUNK) {
1179 			clen = chunkqueue_append_cqmem_to_tempfile(dest, src, len, errh);
1180 			if (__builtin_expect( (clen < 0), 0)) return -1;
1181 			chunkqueue_mark_written(src, clen);
1182 		}
1183 		else { /* (c->type == FILE_CHUNK) */
1184 			clen = chunk_remaining_length(c);
1185 			if (len < clen) clen = len;
1186 			chunkqueue_steal(dest, src, clen);
1187 		}
1188 
1189 	  #else
1190 
1191 		clen = chunk_remaining_length(c);
1192 		if (len < clen) clen = len;
1193 
1194 		switch (c->type) {
1195 		case FILE_CHUNK:
1196 			chunkqueue_steal(dest, src, clen);
1197 			break;
1198 
1199 		case MEM_CHUNK:
1200 			/* store bytes from memory chunk in tempfile */
1201 			if (clen
1202 			    && 0 != chunkqueue_append_mem_to_tempfile(dest,
1203 			                                              c->mem->ptr+c->offset,
1204 			                                              clen, errh))
1205 				return -1;
1206 			chunkqueue_mark_written(src, clen);
1207 			break;
1208 		}
1209 
1210 	  #endif
1211 	} while ((len -= clen));
1212 
1213 	return 0;
1214 }
1215 
chunkqueue_append_cq_range(chunkqueue * const dst,const chunkqueue * const src,off_t offset,off_t len)1216 void chunkqueue_append_cq_range (chunkqueue * const dst, const chunkqueue * const src, off_t offset, off_t len) {
1217     /* similar to chunkqueue_steal() but copy and append src range to dst cq */
1218     /* (dst cq and src cq can be the same cq, so neither is marked restrict) */
1219 
1220     /* copy and append range len from src to dst */
1221     for (const chunk *c = src->first; len > 0 && c != NULL; c = c->next) {
1222         /* scan into src to range offset (also skips empty chunks) */
1223         off_t clen = chunk_remaining_length(c);
1224         if (offset >= clen) {
1225             offset -= clen;
1226             continue;
1227         }
1228         clen -= offset;
1229         if (len < clen) clen = len;
1230         len -= clen;
1231 
1232         if (c->type == FILE_CHUNK) {
1233             chunkqueue_append_file(dst, c->mem, c->offset + offset, clen);
1234             chunkqueue_dup_file_chunk_fd(dst->last, c);
1235         }
1236         else { /*(c->type == MEM_CHUNK)*/
1237             /*(string refs would reduce copying,
1238              * but this path is not expected to be hot)*/
1239             chunkqueue_append_mem(dst, c->mem->ptr + c->offset + offset, clen);
1240         }
1241         offset = 0;
1242     }
1243 }
1244 
chunkqueue_mark_written(chunkqueue * cq,off_t len)1245 void chunkqueue_mark_written(chunkqueue *cq, off_t len) {
1246     cq->bytes_out += len;
1247 
1248     for (chunk *c = cq->first; c; ) {
1249         off_t c_len = chunk_remaining_length(c);
1250         if (len >= c_len) { /* chunk got finished */
1251             chunk * const x = c;
1252             c = c->next;
1253             len -= c_len;
1254             chunk_release(x);
1255         }
1256         else { /* partial chunk */
1257             c->offset += len;
1258             cq->first = c;
1259             return; /* chunk not finished */
1260         }
1261     }
1262     cq->first = cq->last = NULL;
1263 }
1264 
chunkqueue_remove_finished_chunks(chunkqueue * cq)1265 void chunkqueue_remove_finished_chunks(chunkqueue *cq) {
1266     for (chunk *c; (c = cq->first) && 0 == chunk_remaining_length(c); ){
1267         if (NULL == (cq->first = c->next)) cq->last = NULL;
1268         chunk_release(c);
1269     }
1270 }
1271 
chunkqueue_remove_empty_chunks(chunkqueue * cq)1272 void chunkqueue_remove_empty_chunks(chunkqueue *cq) {
1273 	chunk *c;
1274 	chunkqueue_remove_finished_chunks(cq);
1275 
1276 	for (c = cq->first; c && c->next; c = c->next) {
1277 		if (0 == chunk_remaining_length(c->next)) {
1278 			chunk *empty = c->next;
1279 			c->next = empty->next;
1280 			if (empty == cq->last) cq->last = c;
1281 			chunk_release(empty);
1282 		}
1283 	}
1284 }
1285 
chunkqueue_compact_mem_offset(chunkqueue * const cq)1286 void chunkqueue_compact_mem_offset(chunkqueue * const cq) {
1287     chunk * const restrict c = cq->first;
1288     if (0 == c->offset) return;
1289     if (c->type != MEM_CHUNK) return; /*(should not happen)*/
1290 
1291     buffer * const restrict b = c->mem;
1292     size_t len = buffer_clen(b) - c->offset;
1293     memmove(b->ptr, b->ptr+c->offset, len);
1294     c->offset = 0;
1295     buffer_truncate(b, len);
1296 }
1297 
chunkqueue_compact_mem(chunkqueue * cq,size_t clen)1298 void chunkqueue_compact_mem(chunkqueue *cq, size_t clen) {
1299     /* caller must guarantee that chunks in chunkqueue are MEM_CHUNK,
1300      * which is currently always true when reading input from client */
1301     chunk *c = cq->first;
1302     buffer *b = c->mem;
1303     size_t len = buffer_clen(b) - c->offset;
1304     if (len >= clen) return;
1305     if (b->size > clen) {
1306         if (buffer_string_space(b) < clen - len)
1307             chunkqueue_compact_mem_offset(cq);
1308     }
1309     else {
1310         b = chunkqueue_prepend_buffer_open_sz(cq, clen+1);
1311         buffer_append_string_len(b, c->mem->ptr + c->offset, len);
1312         cq->first->next = c->next;
1313         if (NULL == c->next) cq->last = cq->first;
1314         chunk_release(c);
1315         c = cq->first;
1316     }
1317 
1318     for (chunk *fc = c; ((clen -= len) && (c = fc->next)); ) {
1319         len = buffer_clen(c->mem) - c->offset;
1320         if (len > clen) {
1321             buffer_append_string_len(b, c->mem->ptr + c->offset, clen);
1322             c->offset += clen;
1323             break;
1324         }
1325         buffer_append_string_len(b, c->mem->ptr + c->offset, len);
1326         fc->next = c->next;
1327         if (NULL == c->next) cq->last = fc;
1328         chunk_release(c);
1329     }
1330     /* chunkqueue_prepend_buffer_commit() is not called here;
1331      * no data added/removed from chunkqueue; consolidated only */
1332 }
1333 
chunk_open_file_chunk(chunk * const restrict c,log_error_st * const restrict errh)1334 static int chunk_open_file_chunk(chunk * const restrict c, log_error_st * const restrict errh) {
1335 	if (-1 == c->file.fd) {
1336 		/* (permit symlinks; should already have been checked.  However, TOC-TOU remains) */
1337 		if (-1 == (c->file.fd = fdevent_open_cloexec(c->mem->ptr, 1, O_RDONLY, 0))) {
1338 			log_perror(errh, __FILE__, __LINE__, "open failed: %s",c->mem->ptr);
1339 			return -1;
1340 		}
1341 	}
1342 
1343 	/*(skip file size checks if file is temp file created by lighttpd)*/
1344 	if (c->file.is_temp) return 0;
1345 
1346 	struct stat st;
1347 	if (-1 == fstat(c->file.fd, &st)) {
1348 		log_perror(errh, __FILE__, __LINE__, "fstat failed");
1349 		return -1;
1350 	}
1351 
1352 	/*(ok if file grew, e.g. a log file)*/
1353 	if (c->file.length > st.st_size) {
1354 		log_error(errh, __FILE__, __LINE__, "file shrunk: %s", c->mem->ptr);
1355 		return -1;
1356 	}
1357 
1358 	return 0;
1359 }
1360 
chunkqueue_open_file_chunk(chunkqueue * const restrict cq,log_error_st * const restrict errh)1361 int chunkqueue_open_file_chunk(chunkqueue * const restrict cq, log_error_st * const restrict errh) {
1362     return chunk_open_file_chunk(cq->first, errh);
1363 }
1364 
1365 
1366 static ssize_t
chunkqueue_write_data(const int fd,const void * buf,size_t len)1367 chunkqueue_write_data (const int fd, const void *buf, size_t len)
1368 {
1369     ssize_t wr = 0;
1370     if (len)
1371         do { wr = write(fd, buf, len); } while (-1 == wr && errno == EINTR);
1372     return wr;
1373 }
1374 
1375 
1376 #ifdef HAVE_MMAP
1377 __attribute_cold__
1378 #endif
1379 __attribute_noinline__
1380 static ssize_t
chunkqueue_write_chunk_file_intermed(const int fd,chunk * const restrict c,log_error_st * const errh)1381 chunkqueue_write_chunk_file_intermed (const int fd, chunk * const restrict c, log_error_st * const errh)
1382 {
1383     char buf[16384];
1384     char *data = buf;
1385     const off_t len = c->file.length - c->offset;
1386     uint32_t dlen = len < (off_t)sizeof(buf) ? (uint32_t)len : sizeof(buf);
1387     chunkqueue cq = {c,c,0,0,0,0,0}; /*(fake cq for chunkqueue_peek_data())*/
1388     if (0 != chunkqueue_peek_data(&cq, &data, &dlen, errh) && 0 == dlen)
1389         return -1;
1390     return chunkqueue_write_data(fd, data, dlen);
1391 }
1392 
1393 
1394 #if defined HAVE_SYS_SENDFILE_H && defined HAVE_SENDFILE \
1395  && (!defined _LARGEFILE_SOURCE || defined HAVE_SENDFILE64) \
1396  && defined(__linux__) && !defined HAVE_SENDFILE_BROKEN
1397 #include <sys/sendfile.h>
1398 #include <stdint.h>
1399 #endif
1400 static ssize_t
chunkqueue_write_chunk_file(const int fd,chunk * const restrict c,log_error_st * const errh)1401 chunkqueue_write_chunk_file (const int fd, chunk * const restrict c, log_error_st * const errh)
1402 {
1403     /*(similar to network_write_file_chunk_mmap(), but does not use send() on
1404     *  Windows because fd is expected to be file or pipe here, not socket)*/
1405 
1406     if (0 != chunk_open_file_chunk(c, errh))
1407         return -1;
1408 
1409     const off_t len = c->file.length - c->offset;
1410     if (0 == len) return 0; /*(sanity check)*/
1411 
1412   #if defined HAVE_SYS_SENDFILE_H && defined HAVE_SENDFILE \
1413    && (!defined _LARGEFILE_SOURCE || defined HAVE_SENDFILE64) \
1414    && defined(__linux__) && !defined HAVE_SENDFILE_BROKEN
1415     /* Linux kernel >= 2.6.33 supports sendfile() between most fd types */
1416     off_t offset = c->offset;
1417     const ssize_t wr =
1418       sendfile(fd, c->file.fd, &offset, len < INT32_MAX ? len : INT32_MAX);
1419     if (__builtin_expect( (wr >= 0), 1) || (errno != EINVAL && errno != ENOSYS))
1420         return wr;
1421     /*(could fallback to mmap, but if sendfile fails on linux, mmap may, too)*/
1422   #elif defined(HAVE_MMAP)
1423     /*(chunkqueue_write_chunk() caller must protect against SIGBUS, if needed)*/
1424     const chunk_file_view * const restrict cfv =
1425       chunkqueue_chunk_file_view(c, len, errh);
1426     if (NULL != cfv) {
1427         const off_t mmap_avail = chunk_file_view_dlen(cfv, c->offset);
1428         return chunkqueue_write_data(fd, chunk_file_view_dptr(cfv, c->offset),
1429                                      len <= mmap_avail ? len : mmap_avail);
1430     }
1431   #endif
1432 
1433     return chunkqueue_write_chunk_file_intermed(fd, c, errh);
1434 }
1435 
1436 
1437 static ssize_t
chunkqueue_write_chunk_mem(const int fd,const chunk * const restrict c)1438 chunkqueue_write_chunk_mem (const int fd, const chunk * const restrict c)
1439 {
1440     const void * const buf = c->mem->ptr + c->offset;
1441     const size_t len = buffer_clen(c->mem) - (size_t)c->offset;
1442     return chunkqueue_write_data(fd, buf, len);
1443 }
1444 
1445 
1446 ssize_t
chunkqueue_write_chunk(const int fd,chunkqueue * const restrict cq,log_error_st * const restrict errh)1447 chunkqueue_write_chunk (const int fd, chunkqueue * const restrict cq, log_error_st * const restrict errh)
1448 {
1449     /*(note: expects non-empty cq->first)*/
1450     chunk * const c = cq->first;
1451     switch (c->type) {
1452       case MEM_CHUNK:
1453         return chunkqueue_write_chunk_mem(fd, c);
1454       case FILE_CHUNK:
1455         return chunkqueue_write_chunk_file(fd, c, errh);
1456       default:
1457         errno = EINVAL;
1458         return -1;
1459     }
1460 }
1461 
1462 
1463 ssize_t
chunkqueue_write_chunk_to_pipe(const int fd,chunkqueue * const restrict cq,log_error_st * const restrict errh)1464 chunkqueue_write_chunk_to_pipe (const int fd, chunkqueue * const restrict cq, log_error_st * const restrict errh)
1465 {
1466   #ifdef HAVE_SPLICE /* splice() temp files to pipe on Linux */
1467     chunk * const c = cq->first;
1468     if (c->type == FILE_CHUNK) {
1469         const size_t len = (size_t)(c->file.length - c->offset);
1470         loff_t abs_offset = c->offset;
1471         if (__builtin_expect( (0 == len), 0)) return 0;
1472         return (0 == chunk_open_file_chunk(c, errh))
1473           ? splice(c->file.fd, &abs_offset, fd, NULL, len, SPLICE_F_NONBLOCK)
1474           : -1;
1475     }
1476   #endif
1477     return chunkqueue_write_chunk(fd, cq, errh);
1478 }
1479 
1480 
1481 void
chunkqueue_small_resp_optim(chunkqueue * const restrict cq)1482 chunkqueue_small_resp_optim (chunkqueue * const restrict cq)
1483 {
1484     /*(caller must verify response is small (and non-empty) before calling)*/
1485     /*(caller must verify first chunk is MEM_CHUNK, i.e. response headers)*/
1486     /*(caller must verify response is non-zero length)*/
1487 
1488     /*(optimization to use fewer syscalls to send a small response by reading
1489      * small files into memory, thereby avoiding use of sendfile() and multiple
1490      * calls to writev()  (benefit for cleartext (non-TLS) and <= HTTP/1.1))
1491      *(If TLS, then will shortly need to be in memory for encryption anyway)*/
1492 
1493     /*assert(cq->first);*/
1494     /*assert(cq->first->type == MEM_CHUNK);*/
1495     /*assert(cq->first->next);*/
1496     chunk * restrict c = cq->first;
1497     chunk * const restrict filec = c->next;  /*(require file already be open)*/
1498     if (filec != cq->last || filec->type != FILE_CHUNK || filec->file.fd < 0)
1499         return;
1500 
1501     /* Note: there should be no size change in chunkqueue,
1502      * so cq->bytes_in and cq->bytes_out should not be modified */
1503 
1504     off_t len = filec->file.length - filec->offset;
1505     if ((size_t)len > buffer_string_space(c->mem)) {
1506         c->next = chunk_acquire((size_t)len+1);
1507         c = c->next;
1508         /*c->next = filec;*/
1509     }
1510     /* detach filec from chunkqueue; file expected to be read fully */
1511     c->next = NULL;
1512     cq->last = c;
1513 
1514     ssize_t rd;
1515     off_t offset = 0;
1516     char * const ptr = buffer_extend(c->mem, len);
1517     do {
1518         rd = chunk_file_pread(filec->file.fd, ptr+offset, (size_t)len,
1519                               filec->offset+offset);
1520     } while (rd > 0 && (offset += rd, len -= rd));
1521     /*(contents of chunkqueue kept valid even if error reading from file)*/
1522     if (__builtin_expect( (0 == len), 1))
1523         chunk_release(filec);
1524     else { /*(unexpected; error recovery)*/
1525         buffer_truncate(c->mem, (uint32_t)(ptr + offset - c->mem->ptr));
1526         cq->last = c->next = filec;
1527         if (offset)
1528             filec->offset += offset;
1529         else if (__builtin_expect( (cq->first != c), 0)) {
1530             cq->first->next = filec;
1531             chunk_release(c);
1532         }
1533     }
1534 }
1535 
1536 
1537 #if 0
1538 #ifdef HAVE_MMAP
1539 __attribute_noinline__
1540 static off_t
1541 chunk_setjmp_memcpy_cb (void *dst, const void *src, off_t len)
1542 {
1543     /*(on 32-bit systems, caller should assert len <= SIZE_MAX)*/
1544     memcpy(dst, src, (size_t)len);
1545     return len;
1546 }
1547 #endif
1548 #endif
1549 
1550 
1551 int
chunkqueue_peek_data(chunkqueue * const cq,char ** const data,uint32_t * const dlen,log_error_st * const errh)1552 chunkqueue_peek_data (chunkqueue * const cq,
1553                       char ** const data, uint32_t * const dlen,
1554                       log_error_st * const errh)
1555 {
1556     char * const data_in = *data;
1557     const uint32_t data_insz = *dlen;
1558     *dlen = 0;
1559 
1560     for (chunk *c = cq->first; c; ) {
1561         uint32_t space = data_insz - *dlen;
1562         switch (c->type) {
1563           case MEM_CHUNK:
1564             {
1565                 uint32_t have = buffer_clen(c->mem) - (uint32_t)c->offset;
1566                 if (have > space)
1567                     have = space;
1568                 if (__builtin_expect( (0 == have), 0))
1569                     break;
1570                 if (*dlen)
1571                     memcpy(data_in + *dlen, c->mem->ptr + c->offset, have);
1572                 else
1573                     *data = c->mem->ptr + c->offset; /*(reference; defer copy)*/
1574                 *dlen += have;
1575                 break;
1576             }
1577 
1578           case FILE_CHUNK:
1579             if (c->file.fd >= 0 || 0 == chunk_open_file_chunk(c, errh)) {
1580                 off_t len = c->file.length - c->offset;
1581                 if (len > (off_t)space)
1582                     len = (off_t)space;
1583                 if (__builtin_expect( (0 == len), 0))
1584                     break;
1585 
1586             #if 0 /* XXX: might improve performance on some system workloads */
1587               #ifdef HAVE_MMAP
1588                 /* mmap file to access data
1589                  * fd need not be kept open for the mmap once
1590                  * the mmap has been created, but is currently kept open for
1591                  * other pre-existing logic which checks fd and opens file,
1592                  * such as the condition for entering this code block above. */
1593                 /* Note: current use is with somewhat large buffers, e.g. 128k.
1594                  * If larger buffers are used, then upper limit, e.g. 512k,
1595                  * should be set for 32-bit to avoid address space issues) */
1596                 /* Note: under heavy load (or microbenchmark), system-reported
1597                  * memory use for RSS can be very, very large, due to presence
1598                  * of lots and lots of temp file read-only memory maps.
1599                  * pmap -X and exclude lighttpd mmap files to get a better
1600                  * view of memory use */
1601                 const chunk_file_view * const restrict cfv = (!c->file.is_temp)
1602                   ? chunkqueue_chunk_file_view(c, len, errh)
1603                   : NULL;
1604                 if (cfv && chunk_file_view_dlen(cfv, c->offset) >= len) {
1605                     /*(check (above) that mapped chunk length >= requested len)*/
1606                     char * const mdata = chunk_file_view_dptr(cfv, c->offset);
1607                     if (!c->file.is_temp) {/*(might be changed to policy flag)*/
1608                         if (sys_setjmp_eval3(chunk_setjmp_memcpy_cb,
1609                                              data_in+*dlen, mdata, len) < 0) {
1610                             log_error(errh, __FILE__, __LINE__,
1611                               "SIGBUS in mmap: %s %d", c->mem->ptr, c->file.fd);
1612                             return -1;
1613                         }
1614                     }
1615                     else if (*dlen)
1616                         memcpy(data_in+*dlen, mdata, (size_t)len);
1617                     else
1618                         *data = mdata;
1619                     *dlen += (uint32_t)len;
1620                     break;
1621                 }
1622               #endif
1623             #endif
1624 
1625                 ssize_t rd = chunk_file_pread(c->file.fd, data_in+*dlen,
1626                                               (size_t)len, c->offset);
1627                 if (rd <= 0) { /* -1 error; 0 EOF (unexpected) */
1628                     log_perror(errh, __FILE__, __LINE__, "read(\"%s\")",
1629                                c->mem->ptr);
1630                     return -1;
1631                 }
1632 
1633                 *dlen += (uint32_t)rd;
1634                 break;
1635             }
1636             return -1;
1637 
1638           default:
1639             return -1;
1640         }
1641 
1642         if (*dlen == data_insz)
1643             break;
1644 
1645         c = c->next;
1646         if (NULL == c)
1647             break;
1648 
1649         if (*dlen && *data != data_in) {
1650             memcpy(data_in, *data, *dlen);
1651             *data = data_in;
1652         }
1653     }
1654 
1655     return 0;
1656 }
1657 
1658 
1659 int
chunkqueue_read_data(chunkqueue * const cq,char * const data,const uint32_t dlen,log_error_st * const errh)1660 chunkqueue_read_data (chunkqueue * const cq,
1661                       char * const data, const uint32_t dlen,
1662                       log_error_st * const errh)
1663 {
1664     char *ptr = data;
1665     uint32_t len = dlen;
1666     if (chunkqueue_peek_data(cq, &ptr, &len, errh) < 0 || len != dlen)
1667         return -1;
1668     if (data != ptr) memcpy(data, ptr, len);
1669     chunkqueue_mark_written(cq, len);
1670     return 0;
1671 }
1672 
1673 
1674 buffer *
chunkqueue_read_squash(chunkqueue * const restrict cq,log_error_st * const restrict errh)1675 chunkqueue_read_squash (chunkqueue * const restrict cq, log_error_st * const restrict errh)
1676 {
1677     /* read and replace chunkqueue contents with single MEM_CHUNK.
1678      * cq->bytes_out is not modified */
1679 
1680     off_t cqlen = chunkqueue_length(cq);
1681     if (cqlen >= UINT32_MAX) return NULL;
1682 
1683     if (cq->first && NULL == cq->first->next && cq->first->type == MEM_CHUNK)
1684         return cq->first->mem;
1685 
1686     chunk * const c = chunk_acquire((uint32_t)cqlen+1);
1687     char *data = c->mem->ptr;
1688     uint32_t dlen = (uint32_t)cqlen;
1689     int rc = chunkqueue_peek_data(cq, &data, &dlen, errh);
1690     if (rc < 0) {
1691         chunk_release(c);
1692         return NULL;
1693     }
1694     buffer_truncate(c->mem, dlen);
1695 
1696     chunkqueue_release_chunks(cq);
1697     chunkqueue_append_chunk(cq, c);
1698     return c->mem;
1699 }
1700 
1701 
1702 #ifdef HAVE_MMAP
1703 
1704 const chunk_file_view *
chunkqueue_chunk_file_viewadj(chunk * const c,off_t n,log_error_st * restrict errh)1705 chunkqueue_chunk_file_viewadj (chunk * const c, off_t n, log_error_st * restrict errh)
1706 {
1707     /*assert(c->type == FILE_CHUNK);*/
1708     chunk_file_view * restrict cfv = c->file.view;
1709 
1710     if (NULL == cfv) {
1711         /* XXX: might add global config check to enable/disable mmap use here */
1712         cfv = c->file.view = chunk_file_view_init();
1713     }
1714     else if (MAP_FAILED != cfv->mptr)
1715         munmap(cfv->mptr, (size_t)cfv->mlen);
1716         /*cfv->mptr= MAP_FAILED;*//*(assigned below)*/
1717 
1718     if (c->file.fd < 0 && 0 != chunk_open_file_chunk(c, errh)) {
1719         c->file.view = chunk_file_view_failed(cfv);
1720         return NULL;
1721     }
1722 
1723     cfv->foff = mmap_align_offset(c->offset);
1724 
1725     if (0 != n) {
1726         cfv->mlen = c->offset - cfv->foff + n;
1727       #if !(defined(_LP64) || defined(__LP64__) || defined(_WIN64))
1728         /*(consider 512k blocks if this func is used more generically)*/
1729         const off_t mmap_chunk_size = 8 * 1024 * 1024;
1730         if (cfv->mlen > mmap_chunk_size)
1731             cfv->mlen = mmap_chunk_size;
1732       #endif
1733     }
1734     else
1735         cfv->mlen = MMAP_CHUNK_SIZE;
1736     /* XXX: 64-bit might consider larger min block size, or even entire file */
1737     if (cfv->mlen < MMAP_CHUNK_SIZE)
1738         cfv->mlen = MMAP_CHUNK_SIZE;
1739     if (cfv->mlen > c->file.length - cfv->foff)
1740         cfv->mlen = c->file.length - cfv->foff;
1741 
1742     cfv->mptr = mmap(NULL, (size_t)cfv->mlen, PROT_READ,
1743                      c->file.is_temp ? MAP_PRIVATE : chunk_mmap_flags,
1744                      c->file.fd, cfv->foff);
1745 
1746     if (__builtin_expect( (MAP_FAILED == cfv->mptr), 0)) {
1747         if (__builtin_expect( (errno == EINVAL), 0)) {
1748             chunk_mmap_flags &= ~MAP_SHARED;
1749             chunk_mmap_flags |= MAP_PRIVATE;
1750             cfv->mptr = mmap(NULL, (size_t)cfv->mlen, PROT_READ,
1751                              MAP_PRIVATE, c->file.fd, cfv->foff);
1752         }
1753         if (__builtin_expect( (MAP_FAILED == cfv->mptr), 0)) {
1754             c->file.view = chunk_file_view_failed(cfv);
1755             return NULL;
1756         }
1757     }
1758 
1759   #if 0 /*(review callers before changing; some expect open file)*/
1760     /* close() fd as soon as fully mmap() rather than when done w/ chunk
1761      * (possibly worthwhile to keep active fd count lower)
1762      * (probably only reasonable if entire file is mapped) */
1763     if (c->file.is_temp && !c->file.refchg) {
1764         close(c->file.fd);
1765         c->file.fd = -1;
1766     }
1767   #endif
1768 
1769  #if 0
1770     /* disable madvise unless we find common cases where there is a benefit
1771      * (??? madvise for full mmap length or only for original requested n ???)
1772      * (??? might additional flags param to func to indicate madvise pref ???)
1773      * (??? might experiment with Linux mmap flags MAP_POPULATE|MAP_PRIVATE)
1774      * (??? might experiment with madvise MADV_POPULATE_READ (since Linux 5.14))
1775      * note: caller might be in better position to know if starting an mmap
1776      * which will be flushed in entirety, and perform madvise at that point,
1777      * perhaps with MADV_SEQUENTIAL */
1778   #ifdef HAVE_MADVISE
1779     if (cfv->mlen > 65536) /*(skip syscall if size <= 64KB)*/
1780         (void)madvise(cfv->mptr, (size_t)cfv->mlen, MADV_WILLNEED);
1781   #endif
1782  #endif
1783 
1784     return cfv;
1785 }
1786 
1787 #endif /* HAVE_MMAP */
1788