1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2014 Intel Corporation
3 */
4
5 #include <stdio.h>
6 #include <unistd.h>
7
8 #include <rte_common.h>
9 #include <rte_log.h>
10
11 #include "fd_man.h"
12
13
14 #define RTE_LOGTYPE_VHOST_FDMAN RTE_LOGTYPE_USER1
15
16 #define FDPOLLERR (POLLERR | POLLHUP | POLLNVAL)
17
18 static int
get_last_valid_idx(struct fdset * pfdset,int last_valid_idx)19 get_last_valid_idx(struct fdset *pfdset, int last_valid_idx)
20 {
21 int i;
22
23 for (i = last_valid_idx; i >= 0 && pfdset->fd[i].fd == -1; i--)
24 ;
25
26 return i;
27 }
28
29 static void
fdset_move(struct fdset * pfdset,int dst,int src)30 fdset_move(struct fdset *pfdset, int dst, int src)
31 {
32 pfdset->fd[dst] = pfdset->fd[src];
33 pfdset->rwfds[dst] = pfdset->rwfds[src];
34 }
35
36 static void
fdset_shrink_nolock(struct fdset * pfdset)37 fdset_shrink_nolock(struct fdset *pfdset)
38 {
39 int i;
40 int last_valid_idx = get_last_valid_idx(pfdset, pfdset->num - 1);
41
42 for (i = 0; i < last_valid_idx; i++) {
43 if (pfdset->fd[i].fd != -1)
44 continue;
45
46 fdset_move(pfdset, i, last_valid_idx);
47 last_valid_idx = get_last_valid_idx(pfdset, last_valid_idx - 1);
48 }
49 pfdset->num = last_valid_idx + 1;
50 }
51
52 /*
53 * Find deleted fd entries and remove them
54 */
55 static void
fdset_shrink(struct fdset * pfdset)56 fdset_shrink(struct fdset *pfdset)
57 {
58 pthread_mutex_lock(&pfdset->fd_mutex);
59 fdset_shrink_nolock(pfdset);
60 pthread_mutex_unlock(&pfdset->fd_mutex);
61 }
62
63 /**
64 * Returns the index in the fdset for a given fd.
65 * @return
66 * index for the fd, or -1 if fd isn't in the fdset.
67 */
68 static int
fdset_find_fd(struct fdset * pfdset,int fd)69 fdset_find_fd(struct fdset *pfdset, int fd)
70 {
71 int i;
72
73 for (i = 0; i < pfdset->num && pfdset->fd[i].fd != fd; i++)
74 ;
75
76 return i == pfdset->num ? -1 : i;
77 }
78
79 static void
fdset_add_fd(struct fdset * pfdset,int idx,int fd,fd_cb rcb,fd_cb wcb,void * dat)80 fdset_add_fd(struct fdset *pfdset, int idx, int fd,
81 fd_cb rcb, fd_cb wcb, void *dat)
82 {
83 struct fdentry *pfdentry = &pfdset->fd[idx];
84 struct pollfd *pfd = &pfdset->rwfds[idx];
85
86 pfdentry->fd = fd;
87 pfdentry->rcb = rcb;
88 pfdentry->wcb = wcb;
89 pfdentry->dat = dat;
90
91 pfd->fd = fd;
92 pfd->events = rcb ? POLLIN : 0;
93 pfd->events |= wcb ? POLLOUT : 0;
94 pfd->revents = 0;
95 }
96
97 void
fdset_init(struct fdset * pfdset)98 fdset_init(struct fdset *pfdset)
99 {
100 int i;
101
102 if (pfdset == NULL)
103 return;
104
105 for (i = 0; i < MAX_FDS; i++) {
106 pfdset->fd[i].fd = -1;
107 pfdset->fd[i].dat = NULL;
108 }
109 pfdset->num = 0;
110 }
111
112 /**
113 * Register the fd in the fdset with read/write handler and context.
114 */
115 int
fdset_add(struct fdset * pfdset,int fd,fd_cb rcb,fd_cb wcb,void * dat)116 fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat)
117 {
118 int i;
119
120 if (pfdset == NULL || fd == -1)
121 return -1;
122
123 pthread_mutex_lock(&pfdset->fd_mutex);
124 i = pfdset->num < MAX_FDS ? pfdset->num++ : -1;
125 if (i == -1) {
126 pthread_mutex_lock(&pfdset->fd_pooling_mutex);
127 fdset_shrink_nolock(pfdset);
128 pthread_mutex_unlock(&pfdset->fd_pooling_mutex);
129 i = pfdset->num < MAX_FDS ? pfdset->num++ : -1;
130 if (i == -1) {
131 pthread_mutex_unlock(&pfdset->fd_mutex);
132 return -2;
133 }
134 }
135
136 fdset_add_fd(pfdset, i, fd, rcb, wcb, dat);
137 pthread_mutex_unlock(&pfdset->fd_mutex);
138
139 return 0;
140 }
141
142 /**
143 * Unregister the fd from the fdset.
144 * Returns context of a given fd or NULL.
145 */
146 void *
fdset_del(struct fdset * pfdset,int fd)147 fdset_del(struct fdset *pfdset, int fd)
148 {
149 int i;
150 void *dat = NULL;
151
152 if (pfdset == NULL || fd == -1)
153 return NULL;
154
155 do {
156 pthread_mutex_lock(&pfdset->fd_mutex);
157
158 i = fdset_find_fd(pfdset, fd);
159 if (i != -1 && pfdset->fd[i].busy == 0) {
160 /* busy indicates r/wcb is executing! */
161 dat = pfdset->fd[i].dat;
162 pfdset->fd[i].fd = -1;
163 pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL;
164 pfdset->fd[i].dat = NULL;
165 i = -1;
166 }
167 pthread_mutex_unlock(&pfdset->fd_mutex);
168 } while (i != -1);
169
170 return dat;
171 }
172
173 /**
174 * Unregister the fd from the fdset.
175 *
176 * If parameters are invalid, return directly -2.
177 * And check whether fd is busy, if yes, return -1.
178 * Otherwise, try to delete the fd from fdset and
179 * return true.
180 */
181 int
fdset_try_del(struct fdset * pfdset,int fd)182 fdset_try_del(struct fdset *pfdset, int fd)
183 {
184 int i;
185
186 if (pfdset == NULL || fd == -1)
187 return -2;
188
189 pthread_mutex_lock(&pfdset->fd_mutex);
190 i = fdset_find_fd(pfdset, fd);
191 if (i != -1 && pfdset->fd[i].busy) {
192 pthread_mutex_unlock(&pfdset->fd_mutex);
193 return -1;
194 }
195
196 if (i != -1) {
197 pfdset->fd[i].fd = -1;
198 pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL;
199 pfdset->fd[i].dat = NULL;
200 }
201
202 pthread_mutex_unlock(&pfdset->fd_mutex);
203 return 0;
204 }
205
206 /**
207 * This functions runs in infinite blocking loop until there is no fd in
208 * pfdset. It calls corresponding r/w handler if there is event on the fd.
209 *
210 * Before the callback is called, we set the flag to busy status; If other
211 * thread(now rte_vhost_driver_unregister) calls fdset_del concurrently, it
212 * will wait until the flag is reset to zero(which indicates the callback is
213 * finished), then it could free the context after fdset_del.
214 */
215 void *
fdset_event_dispatch(void * arg)216 fdset_event_dispatch(void *arg)
217 {
218 int i;
219 struct pollfd *pfd;
220 struct fdentry *pfdentry;
221 fd_cb rcb, wcb;
222 void *dat;
223 int fd, numfds;
224 int remove1, remove2;
225 int need_shrink;
226 struct fdset *pfdset = arg;
227 int val;
228
229 if (pfdset == NULL)
230 return NULL;
231
232 while (1) {
233
234 /*
235 * When poll is blocked, other threads might unregister
236 * listenfds from and register new listenfds into fdset.
237 * When poll returns, the entries for listenfds in the fdset
238 * might have been updated. It is ok if there is unwanted call
239 * for new listenfds.
240 */
241 pthread_mutex_lock(&pfdset->fd_mutex);
242 numfds = pfdset->num;
243 pthread_mutex_unlock(&pfdset->fd_mutex);
244
245 pthread_mutex_lock(&pfdset->fd_pooling_mutex);
246 val = poll(pfdset->rwfds, numfds, 1000 /* millisecs */);
247 pthread_mutex_unlock(&pfdset->fd_pooling_mutex);
248 if (val < 0)
249 continue;
250
251 need_shrink = 0;
252 for (i = 0; i < numfds; i++) {
253 pthread_mutex_lock(&pfdset->fd_mutex);
254
255 pfdentry = &pfdset->fd[i];
256 fd = pfdentry->fd;
257 pfd = &pfdset->rwfds[i];
258
259 if (fd < 0) {
260 need_shrink = 1;
261 pthread_mutex_unlock(&pfdset->fd_mutex);
262 continue;
263 }
264
265 if (!pfd->revents) {
266 pthread_mutex_unlock(&pfdset->fd_mutex);
267 continue;
268 }
269
270 remove1 = remove2 = 0;
271
272 rcb = pfdentry->rcb;
273 wcb = pfdentry->wcb;
274 dat = pfdentry->dat;
275 pfdentry->busy = 1;
276
277 pthread_mutex_unlock(&pfdset->fd_mutex);
278
279 if (rcb && pfd->revents & (POLLIN | FDPOLLERR))
280 rcb(fd, dat, &remove1);
281 if (wcb && pfd->revents & (POLLOUT | FDPOLLERR))
282 wcb(fd, dat, &remove2);
283 pfdentry->busy = 0;
284 /*
285 * fdset_del needs to check busy flag.
286 * We don't allow fdset_del to be called in callback
287 * directly.
288 */
289 /*
290 * When we are to clean up the fd from fdset,
291 * because the fd is closed in the cb,
292 * the old fd val could be reused by when creates new
293 * listen fd in another thread, we couldn't call
294 * fdset_del.
295 */
296 if (remove1 || remove2) {
297 pfdentry->fd = -1;
298 need_shrink = 1;
299 }
300 }
301
302 if (need_shrink)
303 fdset_shrink(pfdset);
304 }
305
306 return NULL;
307 }
308
309 static void
fdset_pipe_read_cb(int readfd,void * dat __rte_unused,int * remove __rte_unused)310 fdset_pipe_read_cb(int readfd, void *dat __rte_unused,
311 int *remove __rte_unused)
312 {
313 char charbuf[16];
314 int r = read(readfd, charbuf, sizeof(charbuf));
315 /*
316 * Just an optimization, we don't care if read() failed
317 * so ignore explicitly its return value to make the
318 * compiler happy
319 */
320 RTE_SET_USED(r);
321 }
322
323 void
fdset_pipe_uninit(struct fdset * fdset)324 fdset_pipe_uninit(struct fdset *fdset)
325 {
326 fdset_del(fdset, fdset->u.readfd);
327 close(fdset->u.readfd);
328 close(fdset->u.writefd);
329 }
330
331 int
fdset_pipe_init(struct fdset * fdset)332 fdset_pipe_init(struct fdset *fdset)
333 {
334 int ret;
335
336 if (pipe(fdset->u.pipefd) < 0) {
337 RTE_LOG(ERR, VHOST_FDMAN,
338 "failed to create pipe for vhost fdset\n");
339 return -1;
340 }
341
342 ret = fdset_add(fdset, fdset->u.readfd,
343 fdset_pipe_read_cb, NULL, NULL);
344
345 if (ret < 0) {
346 RTE_LOG(ERR, VHOST_FDMAN,
347 "failed to add pipe readfd %d into vhost server fdset\n",
348 fdset->u.readfd);
349
350 fdset_pipe_uninit(fdset);
351 return -1;
352 }
353
354 return 0;
355 }
356
357 void
fdset_pipe_notify(struct fdset * fdset)358 fdset_pipe_notify(struct fdset *fdset)
359 {
360 int r = write(fdset->u.writefd, "1", 1);
361 /*
362 * Just an optimization, we don't care if write() failed
363 * so ignore explicitly its return value to make the
364 * compiler happy
365 */
366 RTE_SET_USED(r);
367
368 }
369