1*76404edcSAsim Jamshed #include "network_backends.h"
2*76404edcSAsim Jamshed
3*76404edcSAsim Jamshed #ifdef USE_LINUX_SENDFILE
4*76404edcSAsim Jamshed
5*76404edcSAsim Jamshed #include "network.h"
6*76404edcSAsim Jamshed #include "fdevent.h"
7*76404edcSAsim Jamshed #include "log.h"
8*76404edcSAsim Jamshed #include "stat_cache.h"
9*76404edcSAsim Jamshed
10*76404edcSAsim Jamshed #include <sys/types.h>
11*76404edcSAsim Jamshed #include <sys/socket.h>
12*76404edcSAsim Jamshed #include <sys/stat.h>
13*76404edcSAsim Jamshed #include <sys/time.h>
14*76404edcSAsim Jamshed #include <sys/resource.h>
15*76404edcSAsim Jamshed
16*76404edcSAsim Jamshed #include <netinet/in.h>
17*76404edcSAsim Jamshed #include <netinet/tcp.h>
18*76404edcSAsim Jamshed
19*76404edcSAsim Jamshed #include <errno.h>
20*76404edcSAsim Jamshed #include <fcntl.h>
21*76404edcSAsim Jamshed #include <unistd.h>
22*76404edcSAsim Jamshed #include <netdb.h>
23*76404edcSAsim Jamshed #include <string.h>
24*76404edcSAsim Jamshed #include <stdlib.h>
25*76404edcSAsim Jamshed #include <fcntl.h>
26*76404edcSAsim Jamshed
27*76404edcSAsim Jamshed /* on linux 2.4.29 + debian/ubuntu we have crashes if this is enabled */
28*76404edcSAsim Jamshed #undef HAVE_POSIX_FADVISE
29*76404edcSAsim Jamshed
network_write_chunkqueue_linuxsendfile(server * srv,connection * con,int fd,chunkqueue * cq,off_t max_bytes)30*76404edcSAsim Jamshed int network_write_chunkqueue_linuxsendfile(server *srv, connection *con, int fd, chunkqueue *cq, off_t max_bytes) {
31*76404edcSAsim Jamshed chunk *c;
32*76404edcSAsim Jamshed
33*76404edcSAsim Jamshed for(c = cq->first; (max_bytes > 0) && (NULL != c); c = c->next) {
34*76404edcSAsim Jamshed int chunk_finished = 0;
35*76404edcSAsim Jamshed
36*76404edcSAsim Jamshed switch(c->type) {
37*76404edcSAsim Jamshed case MEM_CHUNK: {
38*76404edcSAsim Jamshed char * offset;
39*76404edcSAsim Jamshed off_t toSend;
40*76404edcSAsim Jamshed ssize_t r;
41*76404edcSAsim Jamshed
42*76404edcSAsim Jamshed size_t num_chunks, i;
43*76404edcSAsim Jamshed struct iovec chunks[UIO_MAXIOV];
44*76404edcSAsim Jamshed chunk *tc;
45*76404edcSAsim Jamshed size_t num_bytes = 0;
46*76404edcSAsim Jamshed
47*76404edcSAsim Jamshed /* build writev list
48*76404edcSAsim Jamshed *
49*76404edcSAsim Jamshed * 1. limit: num_chunks < UIO_MAXIOV
50*76404edcSAsim Jamshed * 2. limit: num_bytes < max_bytes
51*76404edcSAsim Jamshed */
52*76404edcSAsim Jamshed for (num_chunks = 0, tc = c;
53*76404edcSAsim Jamshed tc && tc->type == MEM_CHUNK && num_chunks < UIO_MAXIOV;
54*76404edcSAsim Jamshed tc = tc->next, num_chunks++);
55*76404edcSAsim Jamshed
56*76404edcSAsim Jamshed for (tc = c, i = 0; i < num_chunks; tc = tc->next, i++) {
57*76404edcSAsim Jamshed if (tc->mem->used == 0) {
58*76404edcSAsim Jamshed chunks[i].iov_base = tc->mem->ptr;
59*76404edcSAsim Jamshed chunks[i].iov_len = 0;
60*76404edcSAsim Jamshed } else {
61*76404edcSAsim Jamshed offset = tc->mem->ptr + tc->offset;
62*76404edcSAsim Jamshed toSend = tc->mem->used - 1 - tc->offset;
63*76404edcSAsim Jamshed
64*76404edcSAsim Jamshed chunks[i].iov_base = offset;
65*76404edcSAsim Jamshed
66*76404edcSAsim Jamshed /* protect the return value of writev() */
67*76404edcSAsim Jamshed if (toSend > max_bytes ||
68*76404edcSAsim Jamshed (off_t) num_bytes + toSend > max_bytes) {
69*76404edcSAsim Jamshed chunks[i].iov_len = max_bytes - num_bytes;
70*76404edcSAsim Jamshed
71*76404edcSAsim Jamshed num_chunks = i + 1;
72*76404edcSAsim Jamshed break;
73*76404edcSAsim Jamshed } else {
74*76404edcSAsim Jamshed chunks[i].iov_len = toSend;
75*76404edcSAsim Jamshed }
76*76404edcSAsim Jamshed
77*76404edcSAsim Jamshed num_bytes += toSend;
78*76404edcSAsim Jamshed }
79*76404edcSAsim Jamshed }
80*76404edcSAsim Jamshed
81*76404edcSAsim Jamshed if ((r = writev(fd, chunks, num_chunks)) < 0) {
82*76404edcSAsim Jamshed switch (errno) {
83*76404edcSAsim Jamshed case EAGAIN:
84*76404edcSAsim Jamshed case EINTR:
85*76404edcSAsim Jamshed r = 0;
86*76404edcSAsim Jamshed break;
87*76404edcSAsim Jamshed case EPIPE:
88*76404edcSAsim Jamshed case ECONNRESET:
89*76404edcSAsim Jamshed return -2;
90*76404edcSAsim Jamshed default:
91*76404edcSAsim Jamshed log_error_write(srv, __FILE__, __LINE__, "ssd",
92*76404edcSAsim Jamshed "writev failed:", strerror(errno), fd);
93*76404edcSAsim Jamshed
94*76404edcSAsim Jamshed return -1;
95*76404edcSAsim Jamshed }
96*76404edcSAsim Jamshed }
97*76404edcSAsim Jamshed
98*76404edcSAsim Jamshed /* check which chunks have been written */
99*76404edcSAsim Jamshed cq->bytes_out += r;
100*76404edcSAsim Jamshed max_bytes -= r;
101*76404edcSAsim Jamshed
102*76404edcSAsim Jamshed for(i = 0, tc = c; i < num_chunks; i++, tc = tc->next) {
103*76404edcSAsim Jamshed if (r >= (ssize_t)chunks[i].iov_len) {
104*76404edcSAsim Jamshed /* written */
105*76404edcSAsim Jamshed r -= chunks[i].iov_len;
106*76404edcSAsim Jamshed tc->offset += chunks[i].iov_len;
107*76404edcSAsim Jamshed
108*76404edcSAsim Jamshed if (chunk_finished) {
109*76404edcSAsim Jamshed /* skip the chunks from further touches */
110*76404edcSAsim Jamshed c = c->next;
111*76404edcSAsim Jamshed } else {
112*76404edcSAsim Jamshed /* chunks_written + c = c->next is done in the for()*/
113*76404edcSAsim Jamshed chunk_finished = 1;
114*76404edcSAsim Jamshed }
115*76404edcSAsim Jamshed } else {
116*76404edcSAsim Jamshed /* partially written */
117*76404edcSAsim Jamshed
118*76404edcSAsim Jamshed tc->offset += r;
119*76404edcSAsim Jamshed chunk_finished = 0;
120*76404edcSAsim Jamshed
121*76404edcSAsim Jamshed break;
122*76404edcSAsim Jamshed }
123*76404edcSAsim Jamshed }
124*76404edcSAsim Jamshed
125*76404edcSAsim Jamshed break;
126*76404edcSAsim Jamshed }
127*76404edcSAsim Jamshed case FILE_CHUNK: {
128*76404edcSAsim Jamshed ssize_t r;
129*76404edcSAsim Jamshed off_t offset;
130*76404edcSAsim Jamshed off_t toSend;
131*76404edcSAsim Jamshed stat_cache_entry *sce = NULL;
132*76404edcSAsim Jamshed
133*76404edcSAsim Jamshed offset = c->file.start + c->offset;
134*76404edcSAsim Jamshed toSend = c->file.length - c->offset;
135*76404edcSAsim Jamshed if (toSend > max_bytes) toSend = max_bytes;
136*76404edcSAsim Jamshed
137*76404edcSAsim Jamshed /* open file if not already opened */
138*76404edcSAsim Jamshed if (-1 == c->file.fd) {
139*76404edcSAsim Jamshed if (-1 == (c->file.fd = open(c->file.name->ptr, O_RDONLY))) {
140*76404edcSAsim Jamshed log_error_write(srv, __FILE__, __LINE__, "ss", "open failed: ", strerror(errno));
141*76404edcSAsim Jamshed
142*76404edcSAsim Jamshed return -1;
143*76404edcSAsim Jamshed }
144*76404edcSAsim Jamshed #ifdef FD_CLOEXEC
145*76404edcSAsim Jamshed fcntl(c->file.fd, F_SETFD, FD_CLOEXEC);
146*76404edcSAsim Jamshed #endif
147*76404edcSAsim Jamshed #ifdef HAVE_POSIX_FADVISE
148*76404edcSAsim Jamshed /* tell the kernel that we want to stream the file */
149*76404edcSAsim Jamshed if (-1 == posix_fadvise(c->file.fd, 0, 0, POSIX_FADV_SEQUENTIAL)) {
150*76404edcSAsim Jamshed if (ENOSYS != errno) {
151*76404edcSAsim Jamshed log_error_write(srv, __FILE__, __LINE__, "ssd",
152*76404edcSAsim Jamshed "posix_fadvise failed:", strerror(errno), c->file.fd);
153*76404edcSAsim Jamshed }
154*76404edcSAsim Jamshed }
155*76404edcSAsim Jamshed #endif
156*76404edcSAsim Jamshed }
157*76404edcSAsim Jamshed
158*76404edcSAsim Jamshed if (-1 == (r = sendfile(fd, c->file.fd, &offset, toSend))) {
159*76404edcSAsim Jamshed switch (errno) {
160*76404edcSAsim Jamshed case EAGAIN:
161*76404edcSAsim Jamshed case EINTR:
162*76404edcSAsim Jamshed /* ok, we can't send more, let's try later again */
163*76404edcSAsim Jamshed r = 0;
164*76404edcSAsim Jamshed break;
165*76404edcSAsim Jamshed case EPIPE:
166*76404edcSAsim Jamshed case ECONNRESET:
167*76404edcSAsim Jamshed return -2;
168*76404edcSAsim Jamshed default:
169*76404edcSAsim Jamshed log_error_write(srv, __FILE__, __LINE__, "ssd",
170*76404edcSAsim Jamshed "sendfile failed:", strerror(errno), fd);
171*76404edcSAsim Jamshed return -1;
172*76404edcSAsim Jamshed }
173*76404edcSAsim Jamshed } else if (r == 0) {
174*76404edcSAsim Jamshed int oerrno = errno;
175*76404edcSAsim Jamshed /* We got an event to write but we wrote nothing
176*76404edcSAsim Jamshed *
177*76404edcSAsim Jamshed * - the file shrinked -> error
178*76404edcSAsim Jamshed * - the remote side closed inbetween -> remote-close */
179*76404edcSAsim Jamshed
180*76404edcSAsim Jamshed if (HANDLER_ERROR == stat_cache_get_entry(srv, con, c->file.name, &sce)) {
181*76404edcSAsim Jamshed /* file is gone ? */
182*76404edcSAsim Jamshed return -1;
183*76404edcSAsim Jamshed }
184*76404edcSAsim Jamshed
185*76404edcSAsim Jamshed if (offset > sce->st.st_size) {
186*76404edcSAsim Jamshed /* file shrinked, close the connection */
187*76404edcSAsim Jamshed errno = oerrno;
188*76404edcSAsim Jamshed
189*76404edcSAsim Jamshed return -1;
190*76404edcSAsim Jamshed }
191*76404edcSAsim Jamshed
192*76404edcSAsim Jamshed errno = oerrno;
193*76404edcSAsim Jamshed return -2;
194*76404edcSAsim Jamshed }
195*76404edcSAsim Jamshed
196*76404edcSAsim Jamshed #ifdef HAVE_POSIX_FADVISE
197*76404edcSAsim Jamshed #if 0
198*76404edcSAsim Jamshed #define K * 1024
199*76404edcSAsim Jamshed #define M * 1024 K
200*76404edcSAsim Jamshed #define READ_AHEAD 4 M
201*76404edcSAsim Jamshed /* check if we need a new chunk */
202*76404edcSAsim Jamshed if ((c->offset & ~(READ_AHEAD - 1)) != ((c->offset + r) & ~(READ_AHEAD - 1))) {
203*76404edcSAsim Jamshed /* tell the kernel that we want to stream the file */
204*76404edcSAsim Jamshed if (-1 == posix_fadvise(c->file.fd, (c->offset + r) & ~(READ_AHEAD - 1), READ_AHEAD, POSIX_FADV_NOREUSE)) {
205*76404edcSAsim Jamshed log_error_write(srv, __FILE__, __LINE__, "ssd",
206*76404edcSAsim Jamshed "posix_fadvise failed:", strerror(errno), c->file.fd);
207*76404edcSAsim Jamshed }
208*76404edcSAsim Jamshed }
209*76404edcSAsim Jamshed #endif
210*76404edcSAsim Jamshed #endif
211*76404edcSAsim Jamshed
212*76404edcSAsim Jamshed c->offset += r;
213*76404edcSAsim Jamshed cq->bytes_out += r;
214*76404edcSAsim Jamshed max_bytes -= r;
215*76404edcSAsim Jamshed
216*76404edcSAsim Jamshed if (c->offset == c->file.length) {
217*76404edcSAsim Jamshed chunk_finished = 1;
218*76404edcSAsim Jamshed
219*76404edcSAsim Jamshed /* chunk_free() / chunk_reset() will cleanup for us but it is a ok to be faster :) */
220*76404edcSAsim Jamshed
221*76404edcSAsim Jamshed if (c->file.fd != -1) {
222*76404edcSAsim Jamshed close(c->file.fd);
223*76404edcSAsim Jamshed c->file.fd = -1;
224*76404edcSAsim Jamshed }
225*76404edcSAsim Jamshed }
226*76404edcSAsim Jamshed
227*76404edcSAsim Jamshed break;
228*76404edcSAsim Jamshed }
229*76404edcSAsim Jamshed default:
230*76404edcSAsim Jamshed
231*76404edcSAsim Jamshed log_error_write(srv, __FILE__, __LINE__, "ds", c, "type not known");
232*76404edcSAsim Jamshed
233*76404edcSAsim Jamshed return -1;
234*76404edcSAsim Jamshed }
235*76404edcSAsim Jamshed
236*76404edcSAsim Jamshed if (!chunk_finished) {
237*76404edcSAsim Jamshed /* not finished yet */
238*76404edcSAsim Jamshed
239*76404edcSAsim Jamshed break;
240*76404edcSAsim Jamshed }
241*76404edcSAsim Jamshed }
242*76404edcSAsim Jamshed
243*76404edcSAsim Jamshed return 0;
244*76404edcSAsim Jamshed }
245*76404edcSAsim Jamshed
246*76404edcSAsim Jamshed #endif
247*76404edcSAsim Jamshed #if 0
248*76404edcSAsim Jamshed network_linuxsendfile_init(void) {
249*76404edcSAsim Jamshed p->write = network_linuxsendfile_write_chunkset;
250*76404edcSAsim Jamshed }
251*76404edcSAsim Jamshed #endif
252