13e41f17aSNiels Provos /*
2b85b710cSNick Mathewson * Copyright 2000-2007 Niels Provos <[email protected]>
3e49e2891SNick Mathewson * Copyright 2007-2012 Niels Provos, Nick Mathewson
43e41f17aSNiels Provos *
53e41f17aSNiels Provos * Redistribution and use in source and binary forms, with or without
63e41f17aSNiels Provos * modification, are permitted provided that the following conditions
73e41f17aSNiels Provos * are met:
83e41f17aSNiels Provos * 1. Redistributions of source code must retain the above copyright
93e41f17aSNiels Provos * notice, this list of conditions and the following disclaimer.
103e41f17aSNiels Provos * 2. Redistributions in binary form must reproduce the above copyright
113e41f17aSNiels Provos * notice, this list of conditions and the following disclaimer in the
123e41f17aSNiels Provos * documentation and/or other materials provided with the distribution.
13c3f496c7SNiels Provos * 3. The name of the author may not be used to endorse or promote products
143e41f17aSNiels Provos * derived from this software without specific prior written permission.
153e41f17aSNiels Provos *
163e41f17aSNiels Provos * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
173e41f17aSNiels Provos * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
183e41f17aSNiels Provos * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
193e41f17aSNiels Provos * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
203e41f17aSNiels Provos * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
213e41f17aSNiels Provos * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
223e41f17aSNiels Provos * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
233e41f17aSNiels Provos * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
243e41f17aSNiels Provos * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
253e41f17aSNiels Provos * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
263e41f17aSNiels Provos */
27ec347b92SNick Mathewson #include "event2/event-config.h"
280915ca0aSKevin Bowling #include "evconfig-private.h"
293e41f17aSNiels Provos
3068120d9bSNick Mathewson #ifdef EVENT__HAVE_EPOLL
3176d4c929SRoss Lagerwall
323e41f17aSNiels Provos #include <stdint.h>
333e41f17aSNiels Provos #include <sys/types.h>
34b0b72eb0SNiels Provos #include <sys/resource.h>
3568120d9bSNick Mathewson #ifdef EVENT__HAVE_SYS_TIME_H
363e41f17aSNiels Provos #include <sys/time.h>
373e41f17aSNiels Provos #endif
383e41f17aSNiels Provos #include <sys/queue.h>
393e41f17aSNiels Provos #include <sys/epoll.h>
403e41f17aSNiels Provos #include <signal.h>
41850c3ff2SChristopher Davis #include <limits.h>
423e41f17aSNiels Provos #include <stdio.h>
433e41f17aSNiels Provos #include <stdlib.h>
443e41f17aSNiels Provos #include <string.h>
453e41f17aSNiels Provos #include <unistd.h>
463e41f17aSNiels Provos #include <errno.h>
4768120d9bSNick Mathewson #ifdef EVENT__HAVE_FCNTL_H
486df2ede5SNiels Provos #include <fcntl.h>
496df2ede5SNiels Provos #endif
5026c75828SNick Mathewson #ifdef EVENT__HAVE_SYS_TIMERFD_H
5126c75828SNick Mathewson #include <sys/timerfd.h>
5226c75828SNick Mathewson #endif
533e41f17aSNiels Provos
5441b7cbc3SNiels Provos #include "event-internal.h"
55169321c9SNick Mathewson #include "evsignal-internal.h"
566b22e74aSNick Mathewson #include "event2/thread.h"
576b22e74aSNick Mathewson #include "evthread-internal.h"
58169321c9SNick Mathewson #include "log-internal.h"
59169321c9SNick Mathewson #include "evmap-internal.h"
60c8c6a897SNick Mathewson #include "changelist-internal.h"
6171bca50fSNick Mathewson #include "time-internal.h"
62ff266332SJoakim Soderberg
63ff266332SJoakim Soderberg /* Since Linux 2.6.17, epoll is able to report about peer half-closed connection
64ff266332SJoakim Soderberg using special EPOLLRDHUP flag on a read event.
65ff266332SJoakim Soderberg */
66ff266332SJoakim Soderberg #if !defined(EPOLLRDHUP)
67ff266332SJoakim Soderberg #define EPOLLRDHUP 0
683908a5e3SNick Mathewson #define EARLY_CLOSE_IF_HAVE_RDHUP 0
693908a5e3SNick Mathewson #else
703908a5e3SNick Mathewson #define EARLY_CLOSE_IF_HAVE_RDHUP EV_FEATURE_EARLY_CLOSE
71ff266332SJoakim Soderberg #endif
72ff266332SJoakim Soderberg
7343ffcf69SNick Mathewson #include "epolltable-internal.h"
743e41f17aSNiels Provos
7526c75828SNick Mathewson #if defined(EVENT__HAVE_SYS_TIMERFD_H) && \
7626c75828SNick Mathewson defined(EVENT__HAVE_TIMERFD_CREATE) && \
7726c75828SNick Mathewson defined(HAVE_POSIX_MONOTONIC) && defined(TFD_NONBLOCK) && \
7826c75828SNick Mathewson defined(TFD_CLOEXEC)
7926c75828SNick Mathewson /* Note that we only use timerfd if TFD_NONBLOCK and TFD_CLOEXEC are available
8026c75828SNick Mathewson and working. This means that we can't support it on 2.6.25 (where timerfd
8126c75828SNick Mathewson was introduced) or 2.6.26, since 2.6.27 introduced those flags.
8226c75828SNick Mathewson */
8326c75828SNick Mathewson #define USING_TIMERFD
8426c75828SNick Mathewson #endif
8526c75828SNick Mathewson
863e41f17aSNiels Provos struct epollop {
873e41f17aSNiels Provos struct epoll_event *events;
883e41f17aSNiels Provos int nevents;
893e41f17aSNiels Provos int epfd;
9026c75828SNick Mathewson #ifdef USING_TIMERFD
9126c75828SNick Mathewson int timerfd;
9226c75828SNick Mathewson #endif
933ba224dbSNiels Provos };
943e41f17aSNiels Provos
95ca42671aSNiels Provos static void *epoll_init(struct event_base *);
9602b2b4d1SNiels Provos static int epoll_dispatch(struct event_base *, struct timeval *);
9702b2b4d1SNiels Provos static void epoll_dealloc(struct event_base *);
983e41f17aSNiels Provos
999531763aSNick Mathewson static const struct eventop epollops_changelist = {
1009531763aSNick Mathewson "epoll (with changelist)",
1013e41f17aSNiels Provos epoll_init,
1028ac3c4c2SNick Mathewson event_changelist_add_,
1038ac3c4c2SNick Mathewson event_changelist_del_,
1042e8051f5SNiels Provos epoll_dispatch,
10588897852SNiels Provos epoll_dealloc,
10605965921SNick Mathewson 1, /* need reinit */
1073908a5e3SNick Mathewson EV_FEATURE_ET|EV_FEATURE_O1| EARLY_CLOSE_IF_HAVE_RDHUP,
108c8c6a897SNick Mathewson EVENT_CHANGELIST_FDINFO_SIZE
1093e41f17aSNiels Provos };
1103e41f17aSNiels Provos
1119531763aSNick Mathewson
1129531763aSNick Mathewson static int epoll_nochangelist_add(struct event_base *base, evutil_socket_t fd,
1139531763aSNick Mathewson short old, short events, void *p);
1149531763aSNick Mathewson static int epoll_nochangelist_del(struct event_base *base, evutil_socket_t fd,
1159531763aSNick Mathewson short old, short events, void *p);
1169531763aSNick Mathewson
1179531763aSNick Mathewson const struct eventop epollops = {
1189531763aSNick Mathewson "epoll",
1199531763aSNick Mathewson epoll_init,
1209531763aSNick Mathewson epoll_nochangelist_add,
1219531763aSNick Mathewson epoll_nochangelist_del,
1229531763aSNick Mathewson epoll_dispatch,
1239531763aSNick Mathewson epoll_dealloc,
1249531763aSNick Mathewson 1, /* need reinit */
125b1b69ac7SDiego Giagio EV_FEATURE_ET|EV_FEATURE_O1|EV_FEATURE_EARLY_CLOSE,
1269531763aSNick Mathewson 0
1279531763aSNick Mathewson };
1289531763aSNick Mathewson
12985255a63SNick Mathewson #define INITIAL_NEVENT 32
13085255a63SNick Mathewson #define MAX_NEVENT 4096
1313e41f17aSNiels Provos
132f9f4d4feSNick Mathewson /* On Linux kernels at least up to 2.6.24.4, epoll can't handle timeout
133f9f4d4feSNick Mathewson * values bigger than (LONG_MAX - 999ULL)/HZ. HZ in the wild can be
134f9f4d4feSNick Mathewson * as big as 1000, and LONG_MAX can be as small as (1<<31)-1, so the
135f9f4d4feSNick Mathewson * largest number of msec we can support here is 2147482. Let's
136f9f4d4feSNick Mathewson * round that down by 47 seconds.
137f9f4d4feSNick Mathewson */
138f9f4d4feSNick Mathewson #define MAX_EPOLL_TIMEOUT_MSEC (35*60*1000)
139f9f4d4feSNick Mathewson
140ca42671aSNiels Provos static void *
epoll_init(struct event_base * base)14141b7cbc3SNiels Provos epoll_init(struct event_base *base)
1423e41f17aSNiels Provos {
143bac906c7SNick Mathewson int epfd = -1;
1443ba224dbSNiels Provos struct epollop *epollop;
1453e41f17aSNiels Provos
14668120d9bSNick Mathewson #ifdef EVENT__HAVE_EPOLL_CREATE1
147bac906c7SNick Mathewson /* First, try the shiny new epoll_create1 interface, if we have it. */
148bac906c7SNick Mathewson epfd = epoll_create1(EPOLL_CLOEXEC);
149bac906c7SNick Mathewson #endif
150bac906c7SNick Mathewson if (epfd == -1) {
151bac906c7SNick Mathewson /* Initialize the kernel queue using the old interface. (The
152bac906c7SNick Mathewson size field is ignored since 2.6.8.) */
15385255a63SNick Mathewson if ((epfd = epoll_create(32000)) == -1) {
15444ceb945SNick Mathewson if (errno != ENOSYS)
155fbdaf3abSNiels Provos event_warn("epoll_create");
1563e41f17aSNiels Provos return (NULL);
1573e41f17aSNiels Provos }
158d0939d2bSJardel Weyrich evutil_make_socket_closeonexec(epfd);
159bac906c7SNick Mathewson }
1606df2ede5SNiels Provos
1611aee7183SJamie Iles if (!(epollop = mm_calloc(1, sizeof(struct epollop)))) {
1621aee7183SJamie Iles close(epfd);
1633ba224dbSNiels Provos return (NULL);
1641aee7183SJamie Iles }
1653ba224dbSNiels Provos
1663ba224dbSNiels Provos epollop->epfd = epfd;
1673e41f17aSNiels Provos
168e3fd294aSNick Mathewson /* Initialize fields */
16918a8cfacSNick Mathewson epollop->events = mm_calloc(INITIAL_NEVENT, sizeof(struct epoll_event));
1703ba224dbSNiels Provos if (epollop->events == NULL) {
17149868b61SNick Mathewson mm_free(epollop);
1721aee7183SJamie Iles close(epfd);
1733e41f17aSNiels Provos return (NULL);
1743e41f17aSNiels Provos }
17585255a63SNick Mathewson epollop->nevents = INITIAL_NEVENT;
1763e41f17aSNiels Provos
1779531763aSNick Mathewson if ((base->flags & EVENT_BASE_FLAG_EPOLL_USE_CHANGELIST) != 0 ||
1789531763aSNick Mathewson ((base->flags & EVENT_BASE_FLAG_IGNORE_ENV) == 0 &&
17926c75828SNick Mathewson evutil_getenv_("EVENT_EPOLL_USE_CHANGELIST") != NULL)) {
18026c75828SNick Mathewson
1819531763aSNick Mathewson base->evsel = &epollops_changelist;
18226c75828SNick Mathewson }
18326c75828SNick Mathewson
18426c75828SNick Mathewson #ifdef USING_TIMERFD
18526c75828SNick Mathewson /*
18626c75828SNick Mathewson The epoll interface ordinarily gives us one-millisecond precision,
18726c75828SNick Mathewson so on Linux it makes perfect sense to use the CLOCK_MONOTONIC_COARSE
18826c75828SNick Mathewson timer. But when the user has set the new PRECISE_TIMER flag for an
18926c75828SNick Mathewson event_base, we can try to use timerfd to give them finer granularity.
19026c75828SNick Mathewson */
19126c75828SNick Mathewson if ((base->flags & EVENT_BASE_FLAG_PRECISE_TIMER) &&
19226c75828SNick Mathewson base->monotonic_timer.monotonic_clock == CLOCK_MONOTONIC) {
19326c75828SNick Mathewson int fd;
19426c75828SNick Mathewson fd = epollop->timerfd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
19526c75828SNick Mathewson if (epollop->timerfd >= 0) {
19626c75828SNick Mathewson struct epoll_event epev;
1971258614fSPatrick Pelletier memset(&epev, 0, sizeof(epev));
19826c75828SNick Mathewson epev.data.fd = epollop->timerfd;
19926c75828SNick Mathewson epev.events = EPOLLIN;
20026c75828SNick Mathewson if (epoll_ctl(epollop->epfd, EPOLL_CTL_ADD, fd, &epev) < 0) {
20126c75828SNick Mathewson event_warn("epoll_ctl(timerfd)");
20226c75828SNick Mathewson close(fd);
20326c75828SNick Mathewson epollop->timerfd = -1;
20426c75828SNick Mathewson }
20526c75828SNick Mathewson } else {
2061aaf9f01SDave Hart if (errno != EINVAL && errno != ENOSYS) {
2071aaf9f01SDave Hart /* These errors probably mean that we were
2081aaf9f01SDave Hart * compiled with timerfd/TFD_* support, but
2091aaf9f01SDave Hart * we're running on a kernel that lacks those.
2101aaf9f01SDave Hart */
21126c75828SNick Mathewson event_warn("timerfd_create");
21226c75828SNick Mathewson }
2131aaf9f01SDave Hart epollop->timerfd = -1;
2141aaf9f01SDave Hart }
21526c75828SNick Mathewson } else {
21626c75828SNick Mathewson epollop->timerfd = -1;
21726c75828SNick Mathewson }
21826c75828SNick Mathewson #endif
2199531763aSNick Mathewson
2208ac3c4c2SNick Mathewson evsig_init_(base);
2213ba224dbSNiels Provos
2223ba224dbSNiels Provos return (epollop);
2233e41f17aSNiels Provos }
2243e41f17aSNiels Provos
2259e725f72SNick Mathewson static const char *
change_to_string(int change)2269e725f72SNick Mathewson change_to_string(int change)
2279e725f72SNick Mathewson {
2289e725f72SNick Mathewson change &= (EV_CHANGE_ADD|EV_CHANGE_DEL);
2299e725f72SNick Mathewson if (change == EV_CHANGE_ADD) {
2309e725f72SNick Mathewson return "add";
2319e725f72SNick Mathewson } else if (change == EV_CHANGE_DEL) {
2329e725f72SNick Mathewson return "del";
2339e725f72SNick Mathewson } else if (change == 0) {
2349e725f72SNick Mathewson return "none";
2359e725f72SNick Mathewson } else {
2369e725f72SNick Mathewson return "???";
2379e725f72SNick Mathewson }
2389e725f72SNick Mathewson }
2399e725f72SNick Mathewson
2409e725f72SNick Mathewson static const char *
epoll_op_to_string(int op)2419e725f72SNick Mathewson epoll_op_to_string(int op)
2429e725f72SNick Mathewson {
2439e725f72SNick Mathewson return op == EPOLL_CTL_ADD?"ADD":
2449e725f72SNick Mathewson op == EPOLL_CTL_DEL?"DEL":
2459e725f72SNick Mathewson op == EPOLL_CTL_MOD?"MOD":
2469e725f72SNick Mathewson "???";
2479e725f72SNick Mathewson }
2489e725f72SNick Mathewson
249a1b142bdSAzat Khuzhin #define PRINT_CHANGES(op, events, ch, status) \
250a1b142bdSAzat Khuzhin "Epoll %s(%d) on fd %d " status ". " \
251a1b142bdSAzat Khuzhin "Old events were %d; " \
252a1b142bdSAzat Khuzhin "read change was %d (%s); " \
253a1b142bdSAzat Khuzhin "write change was %d (%s); " \
254a1b142bdSAzat Khuzhin "close change was %d (%s)", \
255a1b142bdSAzat Khuzhin epoll_op_to_string(op), \
256a1b142bdSAzat Khuzhin events, \
257a1b142bdSAzat Khuzhin ch->fd, \
258a1b142bdSAzat Khuzhin ch->old_events, \
259a1b142bdSAzat Khuzhin ch->read_change, \
260a1b142bdSAzat Khuzhin change_to_string(ch->read_change), \
261a1b142bdSAzat Khuzhin ch->write_change, \
262a1b142bdSAzat Khuzhin change_to_string(ch->write_change), \
263a1b142bdSAzat Khuzhin ch->close_change, \
264a1b142bdSAzat Khuzhin change_to_string(ch->close_change)
265a1b142bdSAzat Khuzhin
266d80c1c36SNick Mathewson static int
epoll_apply_one_change(struct event_base * base,struct epollop * epollop,const struct event_change * ch)2679531763aSNick Mathewson epoll_apply_one_change(struct event_base *base,
2689531763aSNick Mathewson struct epollop *epollop,
2699531763aSNick Mathewson const struct event_change *ch)
270c8c6a897SNick Mathewson {
271c8c6a897SNick Mathewson struct epoll_event epev;
2729531763aSNick Mathewson int op, events = 0;
2738c83eb69SNick Mathewson int idx;
274c8c6a897SNick Mathewson
27543ffcf69SNick Mathewson idx = EPOLL_OP_TABLE_INDEX(ch);
27643ffcf69SNick Mathewson op = epoll_op_table[idx].op;
27743ffcf69SNick Mathewson events = epoll_op_table[idx].events;
278c8c6a897SNick Mathewson
2798c83eb69SNick Mathewson if (!events) {
2808c83eb69SNick Mathewson EVUTIL_ASSERT(op == 0);
2819531763aSNick Mathewson return 0;
2828c83eb69SNick Mathewson }
2838c83eb69SNick Mathewson
284db2efdf5SAzat Khuzhin if ((ch->read_change|ch->write_change|ch->close_change) & EV_CHANGE_ET)
2858c83eb69SNick Mathewson events |= EPOLLET;
286c8c6a897SNick Mathewson
287c8c6a897SNick Mathewson memset(&epev, 0, sizeof(epev));
288c8c6a897SNick Mathewson epev.data.fd = ch->fd;
289c8c6a897SNick Mathewson epev.events = events;
2902d55a190SNick Mathewson if (epoll_ctl(epollop->epfd, op, ch->fd, &epev) == 0) {
291a1b142bdSAzat Khuzhin event_debug((PRINT_CHANGES(op, epev.events, ch, "okay")));
2922d55a190SNick Mathewson return 0;
2932d55a190SNick Mathewson }
2942d55a190SNick Mathewson
2952d55a190SNick Mathewson switch (op) {
2962d55a190SNick Mathewson case EPOLL_CTL_MOD:
2972d55a190SNick Mathewson if (errno == ENOENT) {
298c8c6a897SNick Mathewson /* If a MOD operation fails with ENOENT, the
299c8c6a897SNick Mathewson * fd was probably closed and re-opened. We
300c8c6a897SNick Mathewson * should retry the operation as an ADD.
301c8c6a897SNick Mathewson */
302c8c6a897SNick Mathewson if (epoll_ctl(epollop->epfd, EPOLL_CTL_ADD, ch->fd, &epev) == -1) {
303ec2b05edSNick Mathewson event_warn("Epoll MOD(%d) on %d retried as ADD; that failed too",
304ec2b05edSNick Mathewson (int)epev.events, ch->fd);
3059531763aSNick Mathewson return -1;
306c8c6a897SNick Mathewson } else {
307ec2b05edSNick Mathewson event_debug(("Epoll MOD(%d) on %d retried as ADD; succeeded.",
308ec2b05edSNick Mathewson (int)epev.events,
309ec2b05edSNick Mathewson ch->fd));
3102d55a190SNick Mathewson return 0;
311c8c6a897SNick Mathewson }
3122d55a190SNick Mathewson }
3132d55a190SNick Mathewson break;
3142d55a190SNick Mathewson case EPOLL_CTL_ADD:
3152d55a190SNick Mathewson if (errno == EEXIST) {
316c281aba3SNick Mathewson /* If an ADD operation fails with EEXIST,
317c281aba3SNick Mathewson * either the operation was redundant (as with a
318c281aba3SNick Mathewson * precautionary add), or we ran into a fun
319c281aba3SNick Mathewson * kernel bug where using dup*() to duplicate the
320c281aba3SNick Mathewson * same file into the same fd gives you the same epitem
321c281aba3SNick Mathewson * rather than a fresh one. For the second case,
322c281aba3SNick Mathewson * we must retry with MOD. */
323c281aba3SNick Mathewson if (epoll_ctl(epollop->epfd, EPOLL_CTL_MOD, ch->fd, &epev) == -1) {
324c281aba3SNick Mathewson event_warn("Epoll ADD(%d) on %d retried as MOD; that failed too",
325c281aba3SNick Mathewson (int)epev.events, ch->fd);
3269531763aSNick Mathewson return -1;
327c281aba3SNick Mathewson } else {
328c281aba3SNick Mathewson event_debug(("Epoll ADD(%d) on %d retried as MOD; succeeded.",
329ec2b05edSNick Mathewson (int)epev.events,
330c281aba3SNick Mathewson ch->fd));
3312d55a190SNick Mathewson return 0;
332c281aba3SNick Mathewson }
3332d55a190SNick Mathewson }
3342d55a190SNick Mathewson break;
3352d55a190SNick Mathewson case EPOLL_CTL_DEL:
336d4970d4eSNick Mathewson if (errno == ENOENT || errno == EBADF || errno == EPERM) {
337c8c6a897SNick Mathewson /* If a delete fails with one of these errors,
338c8c6a897SNick Mathewson * that's fine too: we closed the fd before we
339c8c6a897SNick Mathewson * got around to calling epoll_dispatch. */
340ec2b05edSNick Mathewson event_debug(("Epoll DEL(%d) on fd %d gave %s: DEL was unnecessary.",
341ec2b05edSNick Mathewson (int)epev.events,
342ec2b05edSNick Mathewson ch->fd,
343ec2b05edSNick Mathewson strerror(errno)));
3442d55a190SNick Mathewson return 0;
3452d55a190SNick Mathewson }
3462d55a190SNick Mathewson break;
3472d55a190SNick Mathewson default:
3482d55a190SNick Mathewson break;
3492d55a190SNick Mathewson }
3502d55a190SNick Mathewson
351a1b142bdSAzat Khuzhin event_warn(PRINT_CHANGES(op, epev.events, ch, "failed"));
3529531763aSNick Mathewson return -1;
353c8c6a897SNick Mathewson }
354c8c6a897SNick Mathewson
3559531763aSNick Mathewson static int
epoll_apply_changes(struct event_base * base)3569531763aSNick Mathewson epoll_apply_changes(struct event_base *base)
3579531763aSNick Mathewson {
3589531763aSNick Mathewson struct event_changelist *changelist = &base->changelist;
3599531763aSNick Mathewson struct epollop *epollop = base->evbase;
3609531763aSNick Mathewson struct event_change *ch;
3619531763aSNick Mathewson
3629531763aSNick Mathewson int r = 0;
3639531763aSNick Mathewson int i;
3649531763aSNick Mathewson
3659531763aSNick Mathewson for (i = 0; i < changelist->n_changes; ++i) {
3669531763aSNick Mathewson ch = &changelist->changes[i];
3679531763aSNick Mathewson if (epoll_apply_one_change(base, epollop, ch) < 0)
3689531763aSNick Mathewson r = -1;
3699531763aSNick Mathewson }
3709531763aSNick Mathewson
3719531763aSNick Mathewson return (r);
3729531763aSNick Mathewson }
3739531763aSNick Mathewson
3749531763aSNick Mathewson static int
epoll_nochangelist_add(struct event_base * base,evutil_socket_t fd,short old,short events,void * p)3759531763aSNick Mathewson epoll_nochangelist_add(struct event_base *base, evutil_socket_t fd,
3769531763aSNick Mathewson short old, short events, void *p)
3779531763aSNick Mathewson {
3789531763aSNick Mathewson struct event_change ch;
3799531763aSNick Mathewson ch.fd = fd;
3809531763aSNick Mathewson ch.old_events = old;
381b1b69ac7SDiego Giagio ch.read_change = ch.write_change = ch.close_change = 0;
3829531763aSNick Mathewson if (events & EV_WRITE)
3839531763aSNick Mathewson ch.write_change = EV_CHANGE_ADD |
3849531763aSNick Mathewson (events & EV_ET);
3859531763aSNick Mathewson if (events & EV_READ)
3869531763aSNick Mathewson ch.read_change = EV_CHANGE_ADD |
3879531763aSNick Mathewson (events & EV_ET);
388b1b69ac7SDiego Giagio if (events & EV_CLOSED)
389b1b69ac7SDiego Giagio ch.close_change = EV_CHANGE_ADD |
390b1b69ac7SDiego Giagio (events & EV_ET);
3919531763aSNick Mathewson
3929531763aSNick Mathewson return epoll_apply_one_change(base, base->evbase, &ch);
3939531763aSNick Mathewson }
3949531763aSNick Mathewson
3959531763aSNick Mathewson static int
epoll_nochangelist_del(struct event_base * base,evutil_socket_t fd,short old,short events,void * p)3969531763aSNick Mathewson epoll_nochangelist_del(struct event_base *base, evutil_socket_t fd,
3979531763aSNick Mathewson short old, short events, void *p)
3989531763aSNick Mathewson {
3999531763aSNick Mathewson struct event_change ch;
4009531763aSNick Mathewson ch.fd = fd;
4019531763aSNick Mathewson ch.old_events = old;
402b1b69ac7SDiego Giagio ch.read_change = ch.write_change = ch.close_change = 0;
4039531763aSNick Mathewson if (events & EV_WRITE)
404ca4b6404SAzat Khuzhin ch.write_change = EV_CHANGE_DEL |
405ca4b6404SAzat Khuzhin (events & EV_ET);
4069531763aSNick Mathewson if (events & EV_READ)
407ca4b6404SAzat Khuzhin ch.read_change = EV_CHANGE_DEL |
408ca4b6404SAzat Khuzhin (events & EV_ET);
409b1b69ac7SDiego Giagio if (events & EV_CLOSED)
410ca4b6404SAzat Khuzhin ch.close_change = EV_CHANGE_DEL |
411ca4b6404SAzat Khuzhin (events & EV_ET);
4129531763aSNick Mathewson
4139531763aSNick Mathewson return epoll_apply_one_change(base, base->evbase, &ch);
414c8c6a897SNick Mathewson }
415c8c6a897SNick Mathewson
416c8c6a897SNick Mathewson static int
epoll_dispatch(struct event_base * base,struct timeval * tv)41702b2b4d1SNiels Provos epoll_dispatch(struct event_base *base, struct timeval *tv)
4183e41f17aSNiels Provos {
41902b2b4d1SNiels Provos struct epollop *epollop = base->evbase;
4203e41f17aSNiels Provos struct epoll_event *events = epollop->events;
421850c3ff2SChristopher Davis int i, res;
422850c3ff2SChristopher Davis long timeout = -1;
4233e41f17aSNiels Provos
42426c75828SNick Mathewson #ifdef USING_TIMERFD
42526c75828SNick Mathewson if (epollop->timerfd >= 0) {
42626c75828SNick Mathewson struct itimerspec is;
42726c75828SNick Mathewson is.it_interval.tv_sec = 0;
42826c75828SNick Mathewson is.it_interval.tv_nsec = 0;
42926c75828SNick Mathewson if (tv == NULL) {
43026c75828SNick Mathewson /* No timeout; disarm the timer. */
43126c75828SNick Mathewson is.it_value.tv_sec = 0;
43226c75828SNick Mathewson is.it_value.tv_nsec = 0;
43326c75828SNick Mathewson } else {
43426c75828SNick Mathewson if (tv->tv_sec == 0 && tv->tv_usec == 0) {
43526c75828SNick Mathewson /* we need to exit immediately; timerfd can't
43626c75828SNick Mathewson * do that. */
43726c75828SNick Mathewson timeout = 0;
43826c75828SNick Mathewson }
43926c75828SNick Mathewson is.it_value.tv_sec = tv->tv_sec;
44026c75828SNick Mathewson is.it_value.tv_nsec = tv->tv_usec * 1000;
44126c75828SNick Mathewson }
44226c75828SNick Mathewson /* TODO: we could avoid unnecessary syscalls here by only
44326c75828SNick Mathewson calling timerfd_settime when the top timeout changes, or
44426c75828SNick Mathewson when we're called with a different timeval.
44526c75828SNick Mathewson */
44626c75828SNick Mathewson if (timerfd_settime(epollop->timerfd, 0, &is, NULL) < 0) {
44726c75828SNick Mathewson event_warn("timerfd_settime");
44826c75828SNick Mathewson }
44926c75828SNick Mathewson } else
45026c75828SNick Mathewson #endif
451850c3ff2SChristopher Davis if (tv != NULL) {
4528ac3c4c2SNick Mathewson timeout = evutil_tv_to_msec_(tv);
453850c3ff2SChristopher Davis if (timeout < 0 || timeout > MAX_EPOLL_TIMEOUT_MSEC) {
454850c3ff2SChristopher Davis /* Linux kernels can wait forever if the timeout is
455850c3ff2SChristopher Davis * too big; see comment on MAX_EPOLL_TIMEOUT_MSEC. */
456f9f4d4feSNick Mathewson timeout = MAX_EPOLL_TIMEOUT_MSEC;
457f9f4d4feSNick Mathewson }
458850c3ff2SChristopher Davis }
459f9f4d4feSNick Mathewson
460c8c6a897SNick Mathewson epoll_apply_changes(base);
4618ac3c4c2SNick Mathewson event_changelist_remove_all_(&base->changelist, base);
462c8c6a897SNick Mathewson
46376cd2b70SNick Mathewson EVBASE_RELEASE_LOCK(base, th_base_lock);
4646b22e74aSNick Mathewson
4653e41f17aSNiels Provos res = epoll_wait(epollop->epfd, events, epollop->nevents, timeout);
4663e41f17aSNiels Provos
46776cd2b70SNick Mathewson EVBASE_ACQUIRE_LOCK(base, th_base_lock);
4686b22e74aSNick Mathewson
4693e41f17aSNiels Provos if (res == -1) {
4703e41f17aSNiels Provos if (errno != EINTR) {
471fbdaf3abSNiels Provos event_warn("epoll_wait");
4723e41f17aSNiels Provos return (-1);
4733e41f17aSNiels Provos }
4743e41f17aSNiels Provos
4753e41f17aSNiels Provos return (0);
47641b7cbc3SNiels Provos }
4773e41f17aSNiels Provos
478fbdaf3abSNiels Provos event_debug(("%s: epoll_wait reports %d", __func__, res));
4792e36dbe1SNick Mathewson EVUTIL_ASSERT(res <= epollop->nevents);
4803e41f17aSNiels Provos
4813e41f17aSNiels Provos for (i = 0; i < res; i++) {
482e1cd86d7SNiels Provos int what = events[i].events;
48385255a63SNick Mathewson short ev = 0;
48426c75828SNick Mathewson #ifdef USING_TIMERFD
48526c75828SNick Mathewson if (events[i].data.fd == epollop->timerfd)
48626c75828SNick Mathewson continue;
48726c75828SNick Mathewson #endif
488bbed0954SNiels Provos
489*1df324d4SAzat Khuzhin if (what & EPOLLERR) {
490*1df324d4SAzat Khuzhin ev = EV_READ | EV_WRITE;
491*1df324d4SAzat Khuzhin } else if ((what & EPOLLHUP) && !(what & EPOLLRDHUP)) {
49285255a63SNick Mathewson ev = EV_READ | EV_WRITE;
493bbed0954SNiels Provos } else {
49402b2b4d1SNiels Provos if (what & EPOLLIN)
49585255a63SNick Mathewson ev |= EV_READ;
49602b2b4d1SNiels Provos if (what & EPOLLOUT)
49785255a63SNick Mathewson ev |= EV_WRITE;
498b1b69ac7SDiego Giagio if (what & EPOLLRDHUP)
499b1b69ac7SDiego Giagio ev |= EV_CLOSED;
500bbed0954SNiels Provos }
5013e41f17aSNiels Provos
5020faaee01SNick Mathewson if (!ev)
503bbed0954SNiels Provos continue;
504bbed0954SNiels Provos
5058ac3c4c2SNick Mathewson evmap_io_active_(base, events[i].data.fd, ev | EV_ET);
50685255a63SNick Mathewson }
50785255a63SNick Mathewson
50885255a63SNick Mathewson if (res == epollop->nevents && epollop->nevents < MAX_NEVENT) {
50985255a63SNick Mathewson /* We used all of the event space this time. We should
51085255a63SNick Mathewson be ready for more events next time. */
51185255a63SNick Mathewson int new_nevents = epollop->nevents * 2;
51285255a63SNick Mathewson struct epoll_event *new_events;
51385255a63SNick Mathewson
51485255a63SNick Mathewson new_events = mm_realloc(epollop->events,
51585255a63SNick Mathewson new_nevents * sizeof(struct epoll_event));
51685255a63SNick Mathewson if (new_events) {
51785255a63SNick Mathewson epollop->events = new_events;
51885255a63SNick Mathewson epollop->nevents = new_nevents;
51985255a63SNick Mathewson }
5203e41f17aSNiels Provos }
5213e41f17aSNiels Provos
5223e41f17aSNiels Provos return (0);
5233e41f17aSNiels Provos }
5243e41f17aSNiels Provos
5252e8051f5SNiels Provos
526ca42671aSNiels Provos static void
epoll_dealloc(struct event_base * base)52702b2b4d1SNiels Provos epoll_dealloc(struct event_base *base)
5282e8051f5SNiels Provos {
52902b2b4d1SNiels Provos struct epollop *epollop = base->evbase;
5302e8051f5SNiels Provos
5318ac3c4c2SNick Mathewson evsig_dealloc_(base);
5322e8051f5SNiels Provos if (epollop->events)
53349868b61SNick Mathewson mm_free(epollop->events);
5342e8051f5SNiels Provos if (epollop->epfd >= 0)
5352e8051f5SNiels Provos close(epollop->epfd);
53626c75828SNick Mathewson #ifdef USING_TIMERFD
53726c75828SNick Mathewson if (epollop->timerfd >= 0)
53826c75828SNick Mathewson close(epollop->timerfd);
53926c75828SNick Mathewson #endif
5402e8051f5SNiels Provos
5412e8051f5SNiels Provos memset(epollop, 0, sizeof(struct epollop));
54249868b61SNick Mathewson mm_free(epollop);
5432e8051f5SNiels Provos }
54476d4c929SRoss Lagerwall
54568120d9bSNick Mathewson #endif /* EVENT__HAVE_EPOLL */
546