xref: /redis-3.2.3/src/ae_evport.c (revision 8e2d0820)
1 /* ae.c module for illumos event ports.
2  *
3  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  *   * Redistributions of source code must retain the above copyright notice,
9  *     this list of conditions and the following disclaimer.
10  *   * Redistributions in binary form must reproduce the above copyright
11  *     notice, this list of conditions and the following disclaimer in the
12  *     documentation and/or other materials provided with the distribution.
13  *   * Neither the name of Redis nor the names of its contributors may be used
14  *     to endorse or promote products derived from this software without
15  *     specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27  * POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 
31 #include <assert.h>
32 #include <errno.h>
33 #include <port.h>
34 #include <poll.h>
35 
36 #include <sys/types.h>
37 #include <sys/time.h>
38 
39 #include <stdio.h>
40 
41 static int evport_debug = 0;
42 
43 /*
44  * This file implements the ae API using event ports, present on Solaris-based
45  * systems since Solaris 10.  Using the event port interface, we associate file
46  * descriptors with the port.  Each association also includes the set of poll(2)
47  * events that the consumer is interested in (e.g., POLLIN and POLLOUT).
48  *
49  * There's one tricky piece to this implementation: when we return events via
50  * aeApiPoll, the corresponding file descriptors become dissociated from the
51  * port.  This is necessary because poll events are level-triggered, so if the
52  * fd didn't become dissociated, it would immediately fire another event since
53  * the underlying state hasn't changed yet.  We must re-associate the file
54  * descriptor, but only after we know that our caller has actually read from it.
55  * The ae API does not tell us exactly when that happens, but we do know that
56  * it must happen by the time aeApiPoll is called again.  Our solution is to
57  * keep track of the last fds returned by aeApiPoll and re-associate them next
58  * time aeApiPoll is invoked.
59  *
60  * To summarize, in this module, each fd association is EITHER (a) represented
61  * only via the in-kernel association OR (b) represented by pending_fds and
62  * pending_masks.  (b) is only true for the last fds we returned from aeApiPoll,
63  * and only until we enter aeApiPoll again (at which point we restore the
64  * in-kernel association).
65  */
66 #define MAX_EVENT_BATCHSZ 512
67 
68 typedef struct aeApiState {
69     int     portfd;                             /* event port */
70     int     npending;                           /* # of pending fds */
71     int     pending_fds[MAX_EVENT_BATCHSZ];     /* pending fds */
72     int     pending_masks[MAX_EVENT_BATCHSZ];   /* pending fds' masks */
73 } aeApiState;
74 
aeApiCreate(aeEventLoop * eventLoop)75 static int aeApiCreate(aeEventLoop *eventLoop) {
76     int i;
77     aeApiState *state = zmalloc(sizeof(aeApiState));
78     if (!state) return -1;
79 
80     state->portfd = port_create();
81     if (state->portfd == -1) {
82         zfree(state);
83         return -1;
84     }
85 
86     state->npending = 0;
87 
88     for (i = 0; i < MAX_EVENT_BATCHSZ; i++) {
89         state->pending_fds[i] = -1;
90         state->pending_masks[i] = AE_NONE;
91     }
92 
93     eventLoop->apidata = state;
94     return 0;
95 }
96 
aeApiResize(aeEventLoop * eventLoop,int setsize)97 static int aeApiResize(aeEventLoop *eventLoop, int setsize) {
98     /* Nothing to resize here. */
99     return 0;
100 }
101 
aeApiFree(aeEventLoop * eventLoop)102 static void aeApiFree(aeEventLoop *eventLoop) {
103     aeApiState *state = eventLoop->apidata;
104 
105     close(state->portfd);
106     zfree(state);
107 }
108 
aeApiLookupPending(aeApiState * state,int fd)109 static int aeApiLookupPending(aeApiState *state, int fd) {
110     int i;
111 
112     for (i = 0; i < state->npending; i++) {
113         if (state->pending_fds[i] == fd)
114             return (i);
115     }
116 
117     return (-1);
118 }
119 
120 /*
121  * Helper function to invoke port_associate for the given fd and mask.
122  */
aeApiAssociate(const char * where,int portfd,int fd,int mask)123 static int aeApiAssociate(const char *where, int portfd, int fd, int mask) {
124     int events = 0;
125     int rv, err;
126 
127     if (mask & AE_READABLE)
128         events |= POLLIN;
129     if (mask & AE_WRITABLE)
130         events |= POLLOUT;
131 
132     if (evport_debug)
133         fprintf(stderr, "%s: port_associate(%d, 0x%x) = ", where, fd, events);
134 
135     rv = port_associate(portfd, PORT_SOURCE_FD, fd, events,
136         (void *)(uintptr_t)mask);
137     err = errno;
138 
139     if (evport_debug)
140         fprintf(stderr, "%d (%s)\n", rv, rv == 0 ? "no error" : strerror(err));
141 
142     if (rv == -1) {
143         fprintf(stderr, "%s: port_associate: %s\n", where, strerror(err));
144 
145         if (err == EAGAIN)
146             fprintf(stderr, "aeApiAssociate: event port limit exceeded.");
147     }
148 
149     return rv;
150 }
151 
aeApiAddEvent(aeEventLoop * eventLoop,int fd,int mask)152 static int aeApiAddEvent(aeEventLoop *eventLoop, int fd, int mask) {
153     aeApiState *state = eventLoop->apidata;
154     int fullmask, pfd;
155 
156     if (evport_debug)
157         fprintf(stderr, "aeApiAddEvent: fd %d mask 0x%x\n", fd, mask);
158 
159     /*
160      * Since port_associate's "events" argument replaces any existing events, we
161      * must be sure to include whatever events are already associated when
162      * we call port_associate() again.
163      */
164     fullmask = mask | eventLoop->events[fd].mask;
165     pfd = aeApiLookupPending(state, fd);
166 
167     if (pfd != -1) {
168         /*
169          * This fd was recently returned from aeApiPoll.  It should be safe to
170          * assume that the consumer has processed that poll event, but we play
171          * it safer by simply updating pending_mask.  The fd will be
172          * re-associated as usual when aeApiPoll is called again.
173          */
174         if (evport_debug)
175             fprintf(stderr, "aeApiAddEvent: adding to pending fd %d\n", fd);
176         state->pending_masks[pfd] |= fullmask;
177         return 0;
178     }
179 
180     return (aeApiAssociate("aeApiAddEvent", state->portfd, fd, fullmask));
181 }
182 
aeApiDelEvent(aeEventLoop * eventLoop,int fd,int mask)183 static void aeApiDelEvent(aeEventLoop *eventLoop, int fd, int mask) {
184     aeApiState *state = eventLoop->apidata;
185     int fullmask, pfd;
186 
187     if (evport_debug)
188         fprintf(stderr, "del fd %d mask 0x%x\n", fd, mask);
189 
190     pfd = aeApiLookupPending(state, fd);
191 
192     if (pfd != -1) {
193         if (evport_debug)
194             fprintf(stderr, "deleting event from pending fd %d\n", fd);
195 
196         /*
197          * This fd was just returned from aeApiPoll, so it's not currently
198          * associated with the port.  All we need to do is update
199          * pending_mask appropriately.
200          */
201         state->pending_masks[pfd] &= ~mask;
202 
203         if (state->pending_masks[pfd] == AE_NONE)
204             state->pending_fds[pfd] = -1;
205 
206         return;
207     }
208 
209     /*
210      * The fd is currently associated with the port.  Like with the add case
211      * above, we must look at the full mask for the file descriptor before
212      * updating that association.  We don't have a good way of knowing what the
213      * events are without looking into the eventLoop state directly.  We rely on
214      * the fact that our caller has already updated the mask in the eventLoop.
215      */
216 
217     fullmask = eventLoop->events[fd].mask;
218     if (fullmask == AE_NONE) {
219         /*
220          * We're removing *all* events, so use port_dissociate to remove the
221          * association completely.  Failure here indicates a bug.
222          */
223         if (evport_debug)
224             fprintf(stderr, "aeApiDelEvent: port_dissociate(%d)\n", fd);
225 
226         if (port_dissociate(state->portfd, PORT_SOURCE_FD, fd) != 0) {
227             perror("aeApiDelEvent: port_dissociate");
228             abort(); /* will not return */
229         }
230     } else if (aeApiAssociate("aeApiDelEvent", state->portfd, fd,
231         fullmask) != 0) {
232         /*
233          * ENOMEM is a potentially transient condition, but the kernel won't
234          * generally return it unless things are really bad.  EAGAIN indicates
235          * we've reached an resource limit, for which it doesn't make sense to
236          * retry (counter-intuitively).  All other errors indicate a bug.  In any
237          * of these cases, the best we can do is to abort.
238          */
239         abort(); /* will not return */
240     }
241 }
242 
aeApiPoll(aeEventLoop * eventLoop,struct timeval * tvp)243 static int aeApiPoll(aeEventLoop *eventLoop, struct timeval *tvp) {
244     aeApiState *state = eventLoop->apidata;
245     struct timespec timeout, *tsp;
246     int mask, i;
247     uint_t nevents;
248     port_event_t event[MAX_EVENT_BATCHSZ];
249 
250     /*
251      * If we've returned fd events before, we must re-associate them with the
252      * port now, before calling port_get().  See the block comment at the top of
253      * this file for an explanation of why.
254      */
255     for (i = 0; i < state->npending; i++) {
256         if (state->pending_fds[i] == -1)
257             /* This fd has since been deleted. */
258             continue;
259 
260         if (aeApiAssociate("aeApiPoll", state->portfd,
261             state->pending_fds[i], state->pending_masks[i]) != 0) {
262             /* See aeApiDelEvent for why this case is fatal. */
263             abort();
264         }
265 
266         state->pending_masks[i] = AE_NONE;
267         state->pending_fds[i] = -1;
268     }
269 
270     state->npending = 0;
271 
272     if (tvp != NULL) {
273         timeout.tv_sec = tvp->tv_sec;
274         timeout.tv_nsec = tvp->tv_usec * 1000;
275         tsp = &timeout;
276     } else {
277         tsp = NULL;
278     }
279 
280     /*
281      * port_getn can return with errno == ETIME having returned some events (!).
282      * So if we get ETIME, we check nevents, too.
283      */
284     nevents = 1;
285     if (port_getn(state->portfd, event, MAX_EVENT_BATCHSZ, &nevents,
286         tsp) == -1 && (errno != ETIME || nevents == 0)) {
287         if (errno == ETIME || errno == EINTR)
288             return 0;
289 
290         /* Any other error indicates a bug. */
291         perror("aeApiPoll: port_get");
292         abort();
293     }
294 
295     state->npending = nevents;
296 
297     for (i = 0; i < nevents; i++) {
298             mask = 0;
299             if (event[i].portev_events & POLLIN)
300                 mask |= AE_READABLE;
301             if (event[i].portev_events & POLLOUT)
302                 mask |= AE_WRITABLE;
303 
304             eventLoop->fired[i].fd = event[i].portev_object;
305             eventLoop->fired[i].mask = mask;
306 
307             if (evport_debug)
308                 fprintf(stderr, "aeApiPoll: fd %d mask 0x%x\n",
309                     (int)event[i].portev_object, mask);
310 
311             state->pending_fds[i] = event[i].portev_object;
312             state->pending_masks[i] = (uintptr_t)event[i].portev_user;
313     }
314 
315     return nevents;
316 }
317 
aeApiName(void)318 static char *aeApiName(void) {
319     return "evport";
320 }
321