xref: /libevent-2.1.12/kqueue.c (revision 5e4bafbb)
1 /*	$OpenBSD: kqueue.c,v 1.5 2002/07/10 14:41:31 art Exp $	*/
2 
3 /*
4  * Copyright 2000-2007 Niels Provos <[email protected]>
5  * Copyright 2007-2010 Niels Provos and Nick Mathewson
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. The name of the author may not be used to endorse or promote products
16  *    derived from this software without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 #include "event2/event-config.h"
30 
31 #define _GNU_SOURCE
32 
33 #include <sys/types.h>
34 #ifdef _EVENT_HAVE_SYS_TIME_H
35 #include <sys/time.h>
36 #endif
37 #include <sys/queue.h>
38 #include <sys/event.h>
39 #include <signal.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include <unistd.h>
44 #include <errno.h>
45 #ifdef _EVENT_HAVE_INTTYPES_H
46 #include <inttypes.h>
47 #endif
48 
49 /* Some platforms apparently define the udata field of struct kevent as
50  * intptr_t, whereas others define it as void*.  There doesn't seem to be an
51  * easy way to tell them apart via autoconf, so we need to use OS macros. */
52 #if defined(_EVENT_HAVE_INTTYPES_H) && !defined(__OpenBSD__) && !defined(__FreeBSD__) && !defined(__darwin__) && !defined(__APPLE__)
53 #define PTR_TO_UDATA(x)	((intptr_t)(x))
54 #else
55 #define PTR_TO_UDATA(x)	(x)
56 #endif
57 
58 #include "event-internal.h"
59 #include "log-internal.h"
60 #include "evmap-internal.h"
61 #include "event2/thread.h"
62 #include "evthread-internal.h"
63 #include "changelist-internal.h"
64 
65 #define NEVENT		64
66 
67 struct kqop {
68 	struct kevent *changes;
69 	int changes_size;
70 
71 	struct kevent *events;
72 	int events_size;
73 	int kq;
74 	pid_t pid;
75 };
76 
77 static void kqop_free(struct kqop *kqop);
78 
79 static void *kq_init(struct event_base *);
80 static int kq_sig_add(struct event_base *, int, short, short, void *);
81 static int kq_sig_del(struct event_base *, int, short, short, void *);
82 static int kq_dispatch(struct event_base *, struct timeval *);
83 static void kq_dealloc(struct event_base *);
84 
85 const struct eventop kqops = {
86 	"kqueue",
87 	kq_init,
88 	event_changelist_add,
89 	event_changelist_del,
90 	kq_dispatch,
91 	kq_dealloc,
92 	1 /* need reinit */,
93     EV_FEATURE_ET|EV_FEATURE_O1|EV_FEATURE_FDS,
94 	EVENT_CHANGELIST_FDINFO_SIZE
95 };
96 
97 static const struct eventop kqsigops = {
98 	"kqueue_signal",
99 	NULL,
100 	kq_sig_add,
101 	kq_sig_del,
102 	NULL,
103 	NULL,
104 	1 /* need reinit */,
105 	0,
106 	0
107 };
108 
109 static void *
110 kq_init(struct event_base *base)
111 {
112 	int kq = -1;
113 	struct kqop *kqueueop = NULL;
114 
115 	if (!(kqueueop = mm_calloc(1, sizeof(struct kqop))))
116 		return (NULL);
117 
118 /* Initialize the kernel queue */
119 
120 	if ((kq = kqueue()) == -1) {
121 		event_warn("kqueue");
122 		goto err;
123 	}
124 
125 	kqueueop->kq = kq;
126 
127 	kqueueop->pid = getpid();
128 
129 	/* Initialize fields */
130 	kqueueop->changes = mm_calloc(NEVENT, sizeof(struct kevent));
131 	if (kqueueop->changes == NULL)
132 		goto err;
133 	kqueueop->events = mm_calloc(NEVENT, sizeof(struct kevent));
134 	if (kqueueop->events == NULL)
135 		goto err;
136 	kqueueop->events_size = kqueueop->changes_size = NEVENT;
137 
138 	/* Check for Mac OS X kqueue bug. */
139 	memset(&kqueueop->changes[0], 0, sizeof kqueueop->changes[0]);
140 	kqueueop->changes[0].ident = -1;
141 	kqueueop->changes[0].filter = EVFILT_READ;
142 	kqueueop->changes[0].flags = EV_ADD;
143 	/*
144 	 * If kqueue works, then kevent will succeed, and it will
145 	 * stick an error in events[0].  If kqueue is broken, then
146 	 * kevent will fail.
147 	 */
148 	if (kevent(kq,
149 		kqueueop->changes, 1, kqueueop->events, NEVENT, NULL) != 1 ||
150 	    (int)kqueueop->events[0].ident != -1 ||
151 	    kqueueop->events[0].flags != EV_ERROR) {
152 		event_warn("%s: detected broken kqueue; not using.", __func__);
153 		goto err;
154 	}
155 
156 	base->evsigsel = &kqsigops;
157 
158 	return (kqueueop);
159 err:
160 	if (kqueueop)
161 		kqop_free(kqueueop);
162 
163 	return (NULL);
164 }
165 
166 static void
167 kq_sighandler(int sig)
168 {
169 	/* Do nothing here */
170 }
171 
172 static void
173 kq_setup_kevent(struct kevent *out, evutil_socket_t fd, int filter, short change)
174 {
175 	memset(out, 0, sizeof(out));
176 	out->ident = fd;
177 	out->filter = filter;
178 
179 	if (change & EV_CHANGE_ADD) {
180 		out->flags = EV_ADD;
181 		if (change & EV_ET)
182 			out->flags |= EV_CLEAR;
183 #ifdef NOTE_EOF
184 		/* Make it behave like select() and poll() */
185 		if (filter == EVFILT_READ)
186 			out->fflags = NOTE_EOF;
187 #endif
188 	} else {
189 		EVUTIL_ASSERT(change & EV_CHANGE_DEL);
190 		out->flags = EV_DELETE;
191 	}
192 }
193 
194 static int
195 kq_build_changes_list(const struct event_changelist *changelist,
196     struct kqop *kqop)
197 {
198 	int i;
199 	int n_changes = 0;
200 
201 	for (i = 0; i < changelist->n_changes; ++i) {
202 		struct event_change *in_ch = &changelist->changes[i];
203 		struct kevent *out_ch;
204 		if (n_changes >= kqop->changes_size - 1) {
205 			int newsize = kqop->changes_size * 2;
206 			struct kevent *newchanges;
207 
208 			newchanges = mm_realloc(kqop->changes,
209 			    newsize * sizeof(struct kevent));
210 			if (newchanges == NULL) {
211 				event_warn("%s: realloc", __func__);
212 				return (-1);
213 			}
214 			kqop->changes = newchanges;
215 			kqop->changes_size = newsize;
216 		}
217 		if (in_ch->read_change) {
218 			out_ch = &kqop->changes[n_changes++];
219 			kq_setup_kevent(out_ch, in_ch->fd, EVFILT_READ,
220 			    in_ch->read_change);
221 		}
222 		if (in_ch->write_change) {
223 			out_ch = &kqop->changes[n_changes++];
224 			kq_setup_kevent(out_ch, in_ch->fd, EVFILT_WRITE,
225 			    in_ch->write_change);
226 		}
227 	}
228 	return n_changes;
229 }
230 
231 static int
232 kq_dispatch(struct event_base *base, struct timeval *tv)
233 {
234 	struct kqop *kqop = base->evbase;
235 	struct kevent *events = kqop->events;
236 	struct kevent *changes;
237 	struct timespec ts, *ts_p = NULL;
238 	int i, n_changes, res;
239 
240 	if (tv != NULL) {
241 		TIMEVAL_TO_TIMESPEC(tv, &ts);
242 		ts_p = &ts;
243 	}
244 
245 	/* Build "changes" from "base->changes" */
246 	EVUTIL_ASSERT(kqop->changes);
247 	n_changes = kq_build_changes_list(&base->changelist, kqop);
248 	if (n_changes < 0)
249 		return -1;
250 
251 	event_changelist_remove_all(&base->changelist, base);
252 
253 	/* steal the changes array in case some broken code tries to call
254 	 * dispatch twice at once. */
255 	changes = kqop->changes;
256 	kqop->changes = NULL;
257 
258 	EVBASE_RELEASE_LOCK(base, th_base_lock);
259 
260 	res = kevent(kqop->kq, changes, n_changes,
261 	    events, kqop->events_size, ts_p);
262 
263 	EVBASE_ACQUIRE_LOCK(base, th_base_lock);
264 
265 	EVUTIL_ASSERT(kqop->changes == NULL);
266 	kqop->changes = changes;
267 
268 	if (res == -1) {
269 		if (errno != EINTR) {
270 			event_warn("kevent");
271 			return (-1);
272 		}
273 
274 		return (0);
275 	}
276 
277 	event_debug(("%s: kevent reports %d", __func__, res));
278 
279 	for (i = 0; i < res; i++) {
280 		int which = 0;
281 
282 		if (events[i].flags & EV_ERROR) {
283 			/*
284 			 * Error messages that can happen, when a delete fails.
285 			 *   EBADF happens when the file descriptor has been
286 			 *   closed,
287 			 *   ENOENT when the file descriptor was closed and
288 			 *   then reopened.
289 			 *   EINVAL for some reasons not understood; EINVAL
290 			 *   should not be returned ever; but FreeBSD does :-\
291 			 * An error is also indicated when a callback deletes
292 			 * an event we are still processing.  In that case
293 			 * the data field is set to ENOENT.
294 			 */
295 			if (events[i].data == EBADF ||
296 			    events[i].data == EINVAL ||
297 			    events[i].data == ENOENT)
298 				continue;
299 			errno = events[i].data;
300 			return (-1);
301 		}
302 
303 		if (events[i].filter == EVFILT_READ) {
304 			which |= EV_READ;
305 		} else if (events[i].filter == EVFILT_WRITE) {
306 			which |= EV_WRITE;
307 		} else if (events[i].filter == EVFILT_SIGNAL) {
308 			which |= EV_SIGNAL;
309 		}
310 
311 		if (!which)
312 			continue;
313 
314 		if (events[i].filter == EVFILT_SIGNAL) {
315 			evmap_signal_active(base, events[i].ident, 1);
316 		} else {
317 			evmap_io_active(base, events[i].ident, which | EV_ET);
318 		}
319 	}
320 
321 	if (res == kqop->events_size) {
322 		struct kevent *newresult;
323 		int size = kqop->events_size;
324 		/* We used all the events space that we have. Maybe we should
325 		   make it bigger. */
326 		size *= 2;
327 		newresult = mm_realloc(kqop->events,
328 		    size * sizeof(struct kevent));
329 		if (newresult) {
330 			kqop->events = newresult;
331 			kqop->events_size = size;
332 		}
333 	}
334 
335 	return (0);
336 }
337 
338 static void
339 kqop_free(struct kqop *kqop)
340 {
341 	if (kqop->changes)
342 		mm_free(kqop->changes);
343 	if (kqop->events)
344 		mm_free(kqop->events);
345 	if (kqop->kq >= 0 && kqop->pid == getpid())
346 		close(kqop->kq);
347 	memset(kqop, 0, sizeof(struct kqop));
348 	mm_free(kqop);
349 }
350 
351 static void
352 kq_dealloc(struct event_base *base)
353 {
354 	struct kqop *kqop = base->evbase;
355 	evsig_dealloc(base);
356 	kqop_free(kqop);
357 }
358 
359 /* signal handling */
360 static int
361 kq_sig_add(struct event_base *base, int nsignal, short old, short events, void *p)
362 {
363 	struct kqop *kqop = base->evbase;
364 	struct kevent kev;
365 	struct timespec timeout = { 0, 0 };
366 	(void)p;
367 
368 	EVUTIL_ASSERT(nsignal >= 0 && nsignal < NSIG);
369 
370 	memset(&kev, 0, sizeof(kev));
371 	kev.ident = nsignal;
372 	kev.filter = EVFILT_SIGNAL;
373 	kev.flags = EV_ADD;
374 
375 	/* Be ready for the signal if it is sent any
376 	 * time between now and the next call to
377 	 * kq_dispatch. */
378 	if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1)
379 		return (-1);
380 
381 	/* XXXX The manpage suggest we could use SIG_IGN instead of a
382 	 * do-nothing handler */
383 	if (_evsig_set_handler(base, nsignal, kq_sighandler) == -1)
384 		return (-1);
385 
386 	return (0);
387 }
388 
389 static int
390 kq_sig_del(struct event_base *base, int nsignal, short old, short events, void *p)
391 {
392 	struct kqop *kqop = base->evbase;
393 	struct kevent kev;
394 
395 	struct timespec timeout = { 0, 0 };
396 	(void)p;
397 
398 	EVUTIL_ASSERT(nsignal >= 0 && nsignal < NSIG);
399 
400 	memset(&kev, 0, sizeof(kev));
401 	kev.ident = nsignal;
402 	kev.filter = EVFILT_SIGNAL;
403 	kev.flags = EV_DELETE;
404 
405 	/* Because we insert signal events
406 	 * immediately, we need to delete them
407 	 * immediately, too */
408 	if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1)
409 		return (-1);
410 
411 	if (_evsig_restore_handler(base, nsignal) == -1)
412 		return (-1);
413 
414 	return (0);
415 }
416