1 #include "network_backends.h"
2
3 #ifdef USE_LINUX_SENDFILE
4
5 #include "network.h"
6 #include "fdevent.h"
7 #include "log.h"
8 #include "stat_cache.h"
9
10 #include <sys/types.h>
11 #include <sys/socket.h>
12 #include <sys/stat.h>
13 #include <sys/time.h>
14 #include <sys/resource.h>
15
16 #include <netinet/in.h>
17 #include <netinet/tcp.h>
18
19 #include <errno.h>
20 #include <fcntl.h>
21 #include <unistd.h>
22 #include <netdb.h>
23 #include <string.h>
24 #include <stdlib.h>
25 #include <fcntl.h>
26
27 /* on linux 2.4.29 + debian/ubuntu we have crashes if this is enabled */
28 #undef HAVE_POSIX_FADVISE
29
network_write_chunkqueue_linuxsendfile(server * srv,connection * con,int fd,chunkqueue * cq,off_t max_bytes)30 int network_write_chunkqueue_linuxsendfile(server *srv, connection *con, int fd, chunkqueue *cq, off_t max_bytes) {
31 chunk *c;
32
33 for(c = cq->first; (max_bytes > 0) && (NULL != c); c = c->next) {
34 int chunk_finished = 0;
35
36 switch(c->type) {
37 case MEM_CHUNK: {
38 char * offset;
39 off_t toSend;
40 ssize_t r;
41
42 size_t num_chunks, i;
43 struct iovec chunks[UIO_MAXIOV];
44 chunk *tc;
45 size_t num_bytes = 0;
46
47 /* build writev list
48 *
49 * 1. limit: num_chunks < UIO_MAXIOV
50 * 2. limit: num_bytes < max_bytes
51 */
52 for (num_chunks = 0, tc = c;
53 tc && tc->type == MEM_CHUNK && num_chunks < UIO_MAXIOV;
54 tc = tc->next, num_chunks++);
55
56 for (tc = c, i = 0; i < num_chunks; tc = tc->next, i++) {
57 if (tc->mem->used == 0) {
58 chunks[i].iov_base = tc->mem->ptr;
59 chunks[i].iov_len = 0;
60 } else {
61 offset = tc->mem->ptr + tc->offset;
62 toSend = tc->mem->used - 1 - tc->offset;
63
64 chunks[i].iov_base = offset;
65
66 /* protect the return value of writev() */
67 if (toSend > max_bytes ||
68 (off_t) num_bytes + toSend > max_bytes) {
69 chunks[i].iov_len = max_bytes - num_bytes;
70
71 num_chunks = i + 1;
72 break;
73 } else {
74 chunks[i].iov_len = toSend;
75 }
76
77 num_bytes += toSend;
78 }
79 }
80
81 if ((r = writev(fd, chunks, num_chunks)) < 0) {
82 switch (errno) {
83 case EAGAIN:
84 case EINTR:
85 r = 0;
86 break;
87 case EPIPE:
88 case ECONNRESET:
89 return -2;
90 default:
91 log_error_write(srv, __FILE__, __LINE__, "ssd",
92 "writev failed:", strerror(errno), fd);
93
94 return -1;
95 }
96 }
97
98 /* check which chunks have been written */
99 cq->bytes_out += r;
100 max_bytes -= r;
101
102 for(i = 0, tc = c; i < num_chunks; i++, tc = tc->next) {
103 if (r >= (ssize_t)chunks[i].iov_len) {
104 /* written */
105 r -= chunks[i].iov_len;
106 tc->offset += chunks[i].iov_len;
107
108 if (chunk_finished) {
109 /* skip the chunks from further touches */
110 c = c->next;
111 } else {
112 /* chunks_written + c = c->next is done in the for()*/
113 chunk_finished = 1;
114 }
115 } else {
116 /* partially written */
117
118 tc->offset += r;
119 chunk_finished = 0;
120
121 break;
122 }
123 }
124
125 break;
126 }
127 case FILE_CHUNK: {
128 ssize_t r;
129 off_t offset;
130 off_t toSend;
131 stat_cache_entry *sce = NULL;
132
133 offset = c->file.start + c->offset;
134 toSend = c->file.length - c->offset;
135 if (toSend > max_bytes) toSend = max_bytes;
136
137 /* open file if not already opened */
138 if (-1 == c->file.fd) {
139 if (-1 == (c->file.fd = open(c->file.name->ptr, O_RDONLY))) {
140 log_error_write(srv, __FILE__, __LINE__, "ss", "open failed: ", strerror(errno));
141
142 return -1;
143 }
144 #ifdef FD_CLOEXEC
145 fcntl(c->file.fd, F_SETFD, FD_CLOEXEC);
146 #endif
147 #ifdef HAVE_POSIX_FADVISE
148 /* tell the kernel that we want to stream the file */
149 if (-1 == posix_fadvise(c->file.fd, 0, 0, POSIX_FADV_SEQUENTIAL)) {
150 if (ENOSYS != errno) {
151 log_error_write(srv, __FILE__, __LINE__, "ssd",
152 "posix_fadvise failed:", strerror(errno), c->file.fd);
153 }
154 }
155 #endif
156 }
157
158 if (-1 == (r = sendfile(fd, c->file.fd, &offset, toSend))) {
159 switch (errno) {
160 case EAGAIN:
161 case EINTR:
162 /* ok, we can't send more, let's try later again */
163 r = 0;
164 break;
165 case EPIPE:
166 case ECONNRESET:
167 return -2;
168 default:
169 log_error_write(srv, __FILE__, __LINE__, "ssd",
170 "sendfile failed:", strerror(errno), fd);
171 return -1;
172 }
173 } else if (r == 0) {
174 int oerrno = errno;
175 /* We got an event to write but we wrote nothing
176 *
177 * - the file shrinked -> error
178 * - the remote side closed inbetween -> remote-close */
179
180 if (HANDLER_ERROR == stat_cache_get_entry(srv, con, c->file.name, &sce)) {
181 /* file is gone ? */
182 return -1;
183 }
184
185 if (offset > sce->st.st_size) {
186 /* file shrinked, close the connection */
187 errno = oerrno;
188
189 return -1;
190 }
191
192 errno = oerrno;
193 return -2;
194 }
195
196 #ifdef HAVE_POSIX_FADVISE
197 #if 0
198 #define K * 1024
199 #define M * 1024 K
200 #define READ_AHEAD 4 M
201 /* check if we need a new chunk */
202 if ((c->offset & ~(READ_AHEAD - 1)) != ((c->offset + r) & ~(READ_AHEAD - 1))) {
203 /* tell the kernel that we want to stream the file */
204 if (-1 == posix_fadvise(c->file.fd, (c->offset + r) & ~(READ_AHEAD - 1), READ_AHEAD, POSIX_FADV_NOREUSE)) {
205 log_error_write(srv, __FILE__, __LINE__, "ssd",
206 "posix_fadvise failed:", strerror(errno), c->file.fd);
207 }
208 }
209 #endif
210 #endif
211
212 c->offset += r;
213 cq->bytes_out += r;
214 max_bytes -= r;
215
216 if (c->offset == c->file.length) {
217 chunk_finished = 1;
218
219 /* chunk_free() / chunk_reset() will cleanup for us but it is a ok to be faster :) */
220
221 if (c->file.fd != -1) {
222 close(c->file.fd);
223 c->file.fd = -1;
224 }
225 }
226
227 break;
228 }
229 default:
230
231 log_error_write(srv, __FILE__, __LINE__, "ds", c, "type not known");
232
233 return -1;
234 }
235
236 if (!chunk_finished) {
237 /* not finished yet */
238
239 break;
240 }
241 }
242
243 return 0;
244 }
245
246 #endif
247 #if 0
248 network_linuxsendfile_init(void) {
249 p->write = network_linuxsendfile_write_chunkset;
250 }
251 #endif
252