1 /* 2 * Copyright 2000-2009 Niels Provos <[email protected]> 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 3. The name of the author may not be used to endorse or promote products 13 * derived from this software without specific prior written permission. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 #ifdef HAVE_CONFIG_H 27 #include "event-config.h" 28 #endif 29 30 #include <sys/types.h> 31 #include <sys/resource.h> 32 #ifdef _EVENT_HAVE_SYS_TIME_H 33 #include <sys/time.h> 34 #else 35 #include <sys/_time.h> 36 #endif 37 #include <sys/queue.h> 38 #include <sys/devpoll.h> 39 #include <signal.h> 40 #include <stdio.h> 41 #include <stdlib.h> 42 #include <string.h> 43 #include <unistd.h> 44 #include <fcntl.h> 45 #include <errno.h> 46 47 #include "event2/event.h" 48 #include "event2/event_struct.h" 49 #include "event-internal.h" 50 #include "evsignal-internal.h" 51 #include "log-internal.h" 52 #include "evmap-internal.h" 53 54 struct devpollop { 55 struct pollfd *events; 56 int nevents; 57 int dpfd; 58 struct pollfd *changes; 59 int nchanges; 60 }; 61 62 static void *devpoll_init (struct event_base *); 63 static int devpoll_add(struct event_base *, int fd, short old, short events, void *); 64 static int devpoll_del(struct event_base *, int fd, short old, short events, void *); 65 static int devpoll_dispatch (struct event_base *, struct timeval *); 66 static void devpoll_dealloc (struct event_base *); 67 68 const struct eventop devpollops = { 69 "devpoll", 70 devpoll_init, 71 devpoll_add, 72 devpoll_del, 73 devpoll_dispatch, 74 devpoll_dealloc, 75 1, /* need reinit */ 76 EV_FEATURE_FDS|EV_FEATURE_O1, 77 0 78 }; 79 80 #define NEVENT 32000 81 82 static int 83 devpoll_commit(struct devpollop *devpollop) 84 { 85 /* 86 * Due to a bug in Solaris, we have to use pwrite with an offset of 0. 87 * Write is limited to 2GB of data, until it will fail. 88 */ 89 if (pwrite(devpollop->dpfd, devpollop->changes, 90 sizeof(struct pollfd) * devpollop->nchanges, 0) == -1) 91 return(-1); 92 93 devpollop->nchanges = 0; 94 return(0); 95 } 96 97 static int 98 devpoll_queue(struct devpollop *devpollop, int fd, int events) { 99 struct pollfd *pfd; 100 101 if (devpollop->nchanges >= devpollop->nevents) { 102 /* 103 * Change buffer is full, must commit it to /dev/poll before 104 * adding more 105 */ 106 if (devpoll_commit(devpollop) != 0) 107 return(-1); 108 } 109 110 pfd = &devpollop->changes[devpollop->nchanges++]; 111 pfd->fd = fd; 112 pfd->events = events; 113 pfd->revents = 0; 114 115 return(0); 116 } 117 118 static void * 119 devpoll_init(struct event_base *base) 120 { 121 int dpfd, nfiles = NEVENT; 122 struct rlimit rl; 123 struct devpollop *devpollop; 124 125 if (!(devpollop = mm_calloc(1, sizeof(struct devpollop)))) 126 return (NULL); 127 128 if (getrlimit(RLIMIT_NOFILE, &rl) == 0 && 129 rl.rlim_cur != RLIM_INFINITY) 130 nfiles = rl.rlim_cur; 131 132 /* Initialize the kernel queue */ 133 if ((dpfd = open("/dev/poll", O_RDWR)) == -1) { 134 event_warn("open: /dev/poll"); 135 mm_free(devpollop); 136 return (NULL); 137 } 138 139 devpollop->dpfd = dpfd; 140 141 /* Initialize fields */ 142 /* FIXME: allocating 'nfiles' worth of space here can be 143 * expensive and unnecessary. See how epoll.c does it instead. */ 144 devpollop->events = mm_calloc(nfiles, sizeof(struct pollfd)); 145 if (devpollop->events == NULL) { 146 mm_free(devpollop); 147 close(dpfd); 148 return (NULL); 149 } 150 devpollop->nevents = nfiles; 151 152 devpollop->changes = mm_calloc(nfiles, sizeof(struct pollfd)); 153 if (devpollop->changes == NULL) { 154 mm_free(devpollop->events); 155 mm_free(devpollop); 156 close(dpfd); 157 return (NULL); 158 } 159 160 evsig_init(base); 161 162 return (devpollop); 163 } 164 165 static int 166 devpoll_dispatch(struct event_base *base, struct timeval *tv) 167 { 168 struct devpollop *devpollop = base->evbase; 169 struct pollfd *events = devpollop->events; 170 struct dvpoll dvp; 171 int i, res, timeout = -1; 172 173 if (devpollop->nchanges) 174 devpoll_commit(devpollop); 175 176 if (tv != NULL) 177 timeout = tv->tv_sec * 1000 + (tv->tv_usec + 999) / 1000; 178 179 dvp.dp_fds = devpollop->events; 180 dvp.dp_nfds = devpollop->nevents; 181 dvp.dp_timeout = timeout; 182 183 EVBASE_RELEASE_LOCK(base, EVTHREAD_WRITE, th_base_lock); 184 185 res = ioctl(devpollop->dpfd, DP_POLL, &dvp); 186 187 EVBASE_ACQUIRE_LOCK(base, EVTHREAD_WRITE, th_base_lock); 188 189 if (res == -1) { 190 if (errno != EINTR) { 191 event_warn("ioctl: DP_POLL"); 192 return (-1); 193 } 194 195 evsig_process(base); 196 return (0); 197 } else if (base->sig.evsig_caught) { 198 evsig_process(base); 199 } 200 201 event_debug(("%s: devpoll_wait reports %d", __func__, res)); 202 203 for (i = 0; i < res; i++) { 204 int which = 0; 205 int what = events[i].revents; 206 207 if (what & POLLHUP) 208 what |= POLLIN | POLLOUT; 209 else if (what & POLLERR) 210 what |= POLLIN | POLLOUT; 211 212 if (what & POLLIN) 213 which |= EV_READ; 214 if (what & POLLOUT) 215 which |= EV_WRITE; 216 217 if (!which) 218 continue; 219 220 /* XXX(niels): not sure if this works for devpoll */ 221 evmap_io_active(base, events[i].fd, which); 222 } 223 224 return (0); 225 } 226 227 228 static int 229 devpoll_add(struct event_base *base, int fd, short old, short events, void *p) 230 { 231 struct devpollop *devpollop = base->evbase; 232 int res; 233 (void)p; 234 235 /* 236 * It's not necessary to OR the existing read/write events that we 237 * are currently interested in with the new event we are adding. 238 * The /dev/poll driver ORs any new events with the existing events 239 * that it has cached for the fd. 240 */ 241 242 res = 0; 243 if (events & EV_READ) 244 res |= POLLIN; 245 if (events & EV_WRITE) 246 res |= POLLOUT; 247 248 if (devpoll_queue(devpollop, fd, res) != 0) 249 return(-1); 250 251 return (0); 252 } 253 254 static int 255 devpoll_del(struct event_base *base, int fd, short old, short events, void *p) 256 { 257 struct devpollop *devpollop = base->evbase; 258 int res; 259 (void)p; 260 261 res = 0; 262 if (events & EV_READ) 263 res |= POLLIN; 264 if (events & EV_WRITE) 265 res |= POLLOUT; 266 267 /* 268 * The only way to remove an fd from the /dev/poll monitored set is 269 * to use POLLREMOVE by itself. This removes ALL events for the fd 270 * provided so if we care about two events and are only removing one 271 * we must re-add the other event after POLLREMOVE. 272 */ 273 274 if (devpoll_queue(devpollop, fd, POLLREMOVE) != 0) 275 return(-1); 276 277 if ((res & (POLLIN|POLLOUT)) != (POLLIN|POLLOUT)) { 278 /* 279 * We're not deleting all events, so we must resubmit the 280 * event that we are still interested in if one exists. 281 */ 282 283 if ((res & POLLIN) && (old & EV_WRITE)) { 284 /* Deleting read, still care about write */ 285 devpoll_queue(devpollop, fd, POLLOUT); 286 } else if ((res & POLLOUT) && (old & EV_READ)) { 287 /* Deleting write, still care about read */ 288 devpoll_queue(devpollop, fd, POLLIN); 289 } 290 } 291 292 return (0); 293 } 294 295 static void 296 devpoll_dealloc(struct event_base *base) 297 { 298 struct devpollop *devpollop = base->evbase; 299 300 evsig_dealloc(base); 301 if (devpollop->events) 302 mm_free(devpollop->events); 303 if (devpollop->changes) 304 mm_free(devpollop->changes); 305 if (devpollop->dpfd >= 0) 306 close(devpollop->dpfd); 307 308 memset(devpollop, 0, sizeof(struct devpollop)); 309 mm_free(devpollop); 310 } 311