1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2016 Intel Corporation
3 */
4
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <limits.h>
8 #include <stdlib.h>
9 #include <unistd.h>
10 #include <string.h>
11 #include <sys/types.h>
12 #include <sys/socket.h>
13 #include <sys/un.h>
14 #include <sys/queue.h>
15 #include <errno.h>
16 #include <fcntl.h>
17 #include <pthread.h>
18
19 #include <rte_log.h>
20
21 #include "fd_man.h"
22 #include "vhost.h"
23 #include "vhost_user.h"
24
25
26 TAILQ_HEAD(vhost_user_connection_list, vhost_user_connection);
27
28 /*
29 * Every time rte_vhost_driver_register() is invoked, an associated
30 * vhost_user_socket struct will be created.
31 */
32 struct vhost_user_socket {
33 struct vhost_user_connection_list conn_list;
34 pthread_mutex_t conn_mutex;
35 char *path;
36 int socket_fd;
37 struct sockaddr_un un;
38 bool is_server;
39 bool reconnect;
40 bool iommu_support;
41 bool use_builtin_virtio_net;
42 bool extbuf;
43 bool linearbuf;
44 bool async_copy;
45
46 /*
47 * The "supported_features" indicates the feature bits the
48 * vhost driver supports. The "features" indicates the feature
49 * bits after the rte_vhost_driver_features_disable/enable().
50 * It is also the final feature bits used for vhost-user
51 * features negotiation.
52 */
53 uint64_t supported_features;
54 uint64_t features;
55
56 uint64_t protocol_features;
57
58 struct rte_vdpa_device *vdpa_dev;
59
60 struct vhost_device_ops const *notify_ops;
61 };
62
63 struct vhost_user_connection {
64 struct vhost_user_socket *vsocket;
65 int connfd;
66 int vid;
67
68 TAILQ_ENTRY(vhost_user_connection) next;
69 };
70
71 #define MAX_VHOST_SOCKET 1024
72 struct vhost_user {
73 struct vhost_user_socket *vsockets[MAX_VHOST_SOCKET];
74 struct fdset fdset;
75 int vsocket_cnt;
76 pthread_mutex_t mutex;
77 };
78
79 #define MAX_VIRTIO_BACKLOG 128
80
81 static void vhost_user_server_new_connection(int fd, void *data, int *remove);
82 static void vhost_user_read_cb(int fd, void *dat, int *remove);
83 static int create_unix_socket(struct vhost_user_socket *vsocket);
84 static int vhost_user_start_client(struct vhost_user_socket *vsocket);
85
86 static struct vhost_user vhost_user = {
87 .fdset = {
88 .fd = { [0 ... MAX_FDS - 1] = {-1, NULL, NULL, NULL, 0} },
89 .fd_mutex = PTHREAD_MUTEX_INITIALIZER,
90 .fd_pooling_mutex = PTHREAD_MUTEX_INITIALIZER,
91 .num = 0
92 },
93 .vsocket_cnt = 0,
94 .mutex = PTHREAD_MUTEX_INITIALIZER,
95 };
96
97 /*
98 * return bytes# of read on success or negative val on failure. Update fdnum
99 * with number of fds read.
100 */
101 int
read_fd_message(int sockfd,char * buf,int buflen,int * fds,int max_fds,int * fd_num)102 read_fd_message(int sockfd, char *buf, int buflen, int *fds, int max_fds,
103 int *fd_num)
104 {
105 struct iovec iov;
106 struct msghdr msgh;
107 char control[CMSG_SPACE(max_fds * sizeof(int))];
108 struct cmsghdr *cmsg;
109 int got_fds = 0;
110 int ret;
111
112 *fd_num = 0;
113
114 memset(&msgh, 0, sizeof(msgh));
115 iov.iov_base = buf;
116 iov.iov_len = buflen;
117
118 msgh.msg_iov = &iov;
119 msgh.msg_iovlen = 1;
120 msgh.msg_control = control;
121 msgh.msg_controllen = sizeof(control);
122
123 ret = recvmsg(sockfd, &msgh, 0);
124 if (ret <= 0) {
125 if (ret)
126 VHOST_LOG_CONFIG(ERR, "recvmsg failed\n");
127 return ret;
128 }
129
130 if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
131 VHOST_LOG_CONFIG(ERR, "truncated msg\n");
132 return -1;
133 }
134
135 for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
136 cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
137 if ((cmsg->cmsg_level == SOL_SOCKET) &&
138 (cmsg->cmsg_type == SCM_RIGHTS)) {
139 got_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
140 *fd_num = got_fds;
141 memcpy(fds, CMSG_DATA(cmsg), got_fds * sizeof(int));
142 break;
143 }
144 }
145
146 /* Clear out unused file descriptors */
147 while (got_fds < max_fds)
148 fds[got_fds++] = -1;
149
150 return ret;
151 }
152
153 int
send_fd_message(int sockfd,char * buf,int buflen,int * fds,int fd_num)154 send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
155 {
156
157 struct iovec iov;
158 struct msghdr msgh;
159 size_t fdsize = fd_num * sizeof(int);
160 char control[CMSG_SPACE(fdsize)];
161 struct cmsghdr *cmsg;
162 int ret;
163
164 memset(&msgh, 0, sizeof(msgh));
165 iov.iov_base = buf;
166 iov.iov_len = buflen;
167
168 msgh.msg_iov = &iov;
169 msgh.msg_iovlen = 1;
170
171 if (fds && fd_num > 0) {
172 msgh.msg_control = control;
173 msgh.msg_controllen = sizeof(control);
174 cmsg = CMSG_FIRSTHDR(&msgh);
175 if (cmsg == NULL) {
176 VHOST_LOG_CONFIG(ERR, "cmsg == NULL\n");
177 errno = EINVAL;
178 return -1;
179 }
180 cmsg->cmsg_len = CMSG_LEN(fdsize);
181 cmsg->cmsg_level = SOL_SOCKET;
182 cmsg->cmsg_type = SCM_RIGHTS;
183 memcpy(CMSG_DATA(cmsg), fds, fdsize);
184 } else {
185 msgh.msg_control = NULL;
186 msgh.msg_controllen = 0;
187 }
188
189 do {
190 ret = sendmsg(sockfd, &msgh, MSG_NOSIGNAL);
191 } while (ret < 0 && errno == EINTR);
192
193 if (ret < 0) {
194 VHOST_LOG_CONFIG(ERR, "sendmsg error\n");
195 return ret;
196 }
197
198 return ret;
199 }
200
201 static void
vhost_user_add_connection(int fd,struct vhost_user_socket * vsocket)202 vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
203 {
204 int vid;
205 size_t size;
206 struct vhost_user_connection *conn;
207 int ret;
208 struct virtio_net *dev;
209
210 if (vsocket == NULL)
211 return;
212
213 conn = malloc(sizeof(*conn));
214 if (conn == NULL) {
215 close(fd);
216 return;
217 }
218
219 vid = vhost_new_device();
220 if (vid == -1) {
221 goto err;
222 }
223
224 size = strnlen(vsocket->path, PATH_MAX);
225 vhost_set_ifname(vid, vsocket->path, size);
226
227 vhost_set_builtin_virtio_net(vid, vsocket->use_builtin_virtio_net);
228
229 vhost_attach_vdpa_device(vid, vsocket->vdpa_dev);
230
231 if (vsocket->extbuf)
232 vhost_enable_extbuf(vid);
233
234 if (vsocket->linearbuf)
235 vhost_enable_linearbuf(vid);
236
237 if (vsocket->async_copy) {
238 dev = get_device(vid);
239
240 if (dev)
241 dev->async_copy = 1;
242 }
243
244 VHOST_LOG_CONFIG(INFO, "new device, handle is %d\n", vid);
245
246 if (vsocket->notify_ops->new_connection) {
247 ret = vsocket->notify_ops->new_connection(vid);
248 if (ret < 0) {
249 VHOST_LOG_CONFIG(ERR,
250 "failed to add vhost user connection with fd %d\n",
251 fd);
252 goto err_cleanup;
253 }
254 }
255
256 conn->connfd = fd;
257 conn->vsocket = vsocket;
258 conn->vid = vid;
259 ret = fdset_add(&vhost_user.fdset, fd, vhost_user_read_cb,
260 NULL, conn);
261 if (ret < 0) {
262 VHOST_LOG_CONFIG(ERR,
263 "failed to add fd %d into vhost server fdset\n",
264 fd);
265
266 if (vsocket->notify_ops->destroy_connection)
267 vsocket->notify_ops->destroy_connection(conn->vid);
268
269 goto err_cleanup;
270 }
271
272 pthread_mutex_lock(&vsocket->conn_mutex);
273 TAILQ_INSERT_TAIL(&vsocket->conn_list, conn, next);
274 pthread_mutex_unlock(&vsocket->conn_mutex);
275
276 fdset_pipe_notify(&vhost_user.fdset);
277 return;
278
279 err_cleanup:
280 vhost_destroy_device(vid);
281 err:
282 free(conn);
283 close(fd);
284 }
285
286 /* call back when there is new vhost-user connection from client */
287 static void
vhost_user_server_new_connection(int fd,void * dat,int * remove __rte_unused)288 vhost_user_server_new_connection(int fd, void *dat, int *remove __rte_unused)
289 {
290 struct vhost_user_socket *vsocket = dat;
291
292 fd = accept(fd, NULL, NULL);
293 if (fd < 0)
294 return;
295
296 VHOST_LOG_CONFIG(INFO, "new vhost user connection is %d\n", fd);
297 vhost_user_add_connection(fd, vsocket);
298 }
299
300 static void
vhost_user_read_cb(int connfd,void * dat,int * remove)301 vhost_user_read_cb(int connfd, void *dat, int *remove)
302 {
303 struct vhost_user_connection *conn = dat;
304 struct vhost_user_socket *vsocket = conn->vsocket;
305 int ret;
306
307 ret = vhost_user_msg_handler(conn->vid, connfd);
308 if (ret < 0) {
309 struct virtio_net *dev = get_device(conn->vid);
310
311 close(connfd);
312 *remove = 1;
313
314 if (dev)
315 vhost_destroy_device_notify(dev);
316
317 if (vsocket->notify_ops->destroy_connection)
318 vsocket->notify_ops->destroy_connection(conn->vid);
319
320 vhost_destroy_device(conn->vid);
321
322 if (vsocket->reconnect) {
323 create_unix_socket(vsocket);
324 vhost_user_start_client(vsocket);
325 }
326
327 pthread_mutex_lock(&vsocket->conn_mutex);
328 TAILQ_REMOVE(&vsocket->conn_list, conn, next);
329 pthread_mutex_unlock(&vsocket->conn_mutex);
330
331 free(conn);
332 }
333 }
334
335 static int
create_unix_socket(struct vhost_user_socket * vsocket)336 create_unix_socket(struct vhost_user_socket *vsocket)
337 {
338 int fd;
339 struct sockaddr_un *un = &vsocket->un;
340
341 fd = socket(AF_UNIX, SOCK_STREAM, 0);
342 if (fd < 0)
343 return -1;
344 VHOST_LOG_CONFIG(INFO, "vhost-user %s: socket created, fd: %d\n",
345 vsocket->is_server ? "server" : "client", fd);
346
347 if (!vsocket->is_server && fcntl(fd, F_SETFL, O_NONBLOCK)) {
348 VHOST_LOG_CONFIG(ERR,
349 "vhost-user: can't set nonblocking mode for socket, fd: "
350 "%d (%s)\n", fd, strerror(errno));
351 close(fd);
352 return -1;
353 }
354
355 memset(un, 0, sizeof(*un));
356 un->sun_family = AF_UNIX;
357 strncpy(un->sun_path, vsocket->path, sizeof(un->sun_path));
358 un->sun_path[sizeof(un->sun_path) - 1] = '\0';
359
360 vsocket->socket_fd = fd;
361 return 0;
362 }
363
364 static int
vhost_user_start_server(struct vhost_user_socket * vsocket)365 vhost_user_start_server(struct vhost_user_socket *vsocket)
366 {
367 int ret;
368 int fd = vsocket->socket_fd;
369 const char *path = vsocket->path;
370
371 /*
372 * bind () may fail if the socket file with the same name already
373 * exists. But the library obviously should not delete the file
374 * provided by the user, since we can not be sure that it is not
375 * being used by other applications. Moreover, many applications form
376 * socket names based on user input, which is prone to errors.
377 *
378 * The user must ensure that the socket does not exist before
379 * registering the vhost driver in server mode.
380 */
381 ret = bind(fd, (struct sockaddr *)&vsocket->un, sizeof(vsocket->un));
382 if (ret < 0) {
383 VHOST_LOG_CONFIG(ERR,
384 "failed to bind to %s: %s; remove it and try again\n",
385 path, strerror(errno));
386 goto err;
387 }
388 VHOST_LOG_CONFIG(INFO, "bind to %s\n", path);
389
390 ret = listen(fd, MAX_VIRTIO_BACKLOG);
391 if (ret < 0)
392 goto err;
393
394 ret = fdset_add(&vhost_user.fdset, fd, vhost_user_server_new_connection,
395 NULL, vsocket);
396 if (ret < 0) {
397 VHOST_LOG_CONFIG(ERR,
398 "failed to add listen fd %d to vhost server fdset\n",
399 fd);
400 goto err;
401 }
402
403 return 0;
404
405 err:
406 close(fd);
407 return -1;
408 }
409
410 struct vhost_user_reconnect {
411 struct sockaddr_un un;
412 int fd;
413 struct vhost_user_socket *vsocket;
414
415 TAILQ_ENTRY(vhost_user_reconnect) next;
416 };
417
418 TAILQ_HEAD(vhost_user_reconnect_tailq_list, vhost_user_reconnect);
419 struct vhost_user_reconnect_list {
420 struct vhost_user_reconnect_tailq_list head;
421 pthread_mutex_t mutex;
422 };
423
424 static struct vhost_user_reconnect_list reconn_list;
425 static pthread_t reconn_tid;
426
427 static int
vhost_user_connect_nonblock(int fd,struct sockaddr * un,size_t sz)428 vhost_user_connect_nonblock(int fd, struct sockaddr *un, size_t sz)
429 {
430 int ret, flags;
431
432 ret = connect(fd, un, sz);
433 if (ret < 0 && errno != EISCONN)
434 return -1;
435
436 flags = fcntl(fd, F_GETFL, 0);
437 if (flags < 0) {
438 VHOST_LOG_CONFIG(ERR,
439 "can't get flags for connfd %d\n", fd);
440 return -2;
441 }
442 if ((flags & O_NONBLOCK) && fcntl(fd, F_SETFL, flags & ~O_NONBLOCK)) {
443 VHOST_LOG_CONFIG(ERR,
444 "can't disable nonblocking on fd %d\n", fd);
445 return -2;
446 }
447 return 0;
448 }
449
450 static void *
vhost_user_client_reconnect(void * arg __rte_unused)451 vhost_user_client_reconnect(void *arg __rte_unused)
452 {
453 int ret;
454 struct vhost_user_reconnect *reconn, *next;
455
456 while (1) {
457 pthread_mutex_lock(&reconn_list.mutex);
458
459 /*
460 * An equal implementation of TAILQ_FOREACH_SAFE,
461 * which does not exist on all platforms.
462 */
463 for (reconn = TAILQ_FIRST(&reconn_list.head);
464 reconn != NULL; reconn = next) {
465 next = TAILQ_NEXT(reconn, next);
466
467 ret = vhost_user_connect_nonblock(reconn->fd,
468 (struct sockaddr *)&reconn->un,
469 sizeof(reconn->un));
470 if (ret == -2) {
471 close(reconn->fd);
472 VHOST_LOG_CONFIG(ERR,
473 "reconnection for fd %d failed\n",
474 reconn->fd);
475 goto remove_fd;
476 }
477 if (ret == -1)
478 continue;
479
480 VHOST_LOG_CONFIG(INFO,
481 "%s: connected\n", reconn->vsocket->path);
482 vhost_user_add_connection(reconn->fd, reconn->vsocket);
483 remove_fd:
484 TAILQ_REMOVE(&reconn_list.head, reconn, next);
485 free(reconn);
486 }
487
488 pthread_mutex_unlock(&reconn_list.mutex);
489 sleep(1);
490 }
491
492 return NULL;
493 }
494
495 static int
vhost_user_reconnect_init(void)496 vhost_user_reconnect_init(void)
497 {
498 int ret;
499
500 ret = pthread_mutex_init(&reconn_list.mutex, NULL);
501 if (ret < 0) {
502 VHOST_LOG_CONFIG(ERR, "failed to initialize mutex");
503 return ret;
504 }
505 TAILQ_INIT(&reconn_list.head);
506
507 ret = rte_ctrl_thread_create(&reconn_tid, "vhost_reconn", NULL,
508 vhost_user_client_reconnect, NULL);
509 if (ret != 0) {
510 VHOST_LOG_CONFIG(ERR, "failed to create reconnect thread");
511 if (pthread_mutex_destroy(&reconn_list.mutex)) {
512 VHOST_LOG_CONFIG(ERR,
513 "failed to destroy reconnect mutex");
514 }
515 }
516
517 return ret;
518 }
519
520 static int
vhost_user_start_client(struct vhost_user_socket * vsocket)521 vhost_user_start_client(struct vhost_user_socket *vsocket)
522 {
523 int ret;
524 int fd = vsocket->socket_fd;
525 const char *path = vsocket->path;
526 struct vhost_user_reconnect *reconn;
527
528 ret = vhost_user_connect_nonblock(fd, (struct sockaddr *)&vsocket->un,
529 sizeof(vsocket->un));
530 if (ret == 0) {
531 vhost_user_add_connection(fd, vsocket);
532 return 0;
533 }
534
535 VHOST_LOG_CONFIG(WARNING,
536 "failed to connect to %s: %s\n",
537 path, strerror(errno));
538
539 if (ret == -2 || !vsocket->reconnect) {
540 close(fd);
541 return -1;
542 }
543
544 VHOST_LOG_CONFIG(INFO, "%s: reconnecting...\n", path);
545 reconn = malloc(sizeof(*reconn));
546 if (reconn == NULL) {
547 VHOST_LOG_CONFIG(ERR,
548 "failed to allocate memory for reconnect\n");
549 close(fd);
550 return -1;
551 }
552 reconn->un = vsocket->un;
553 reconn->fd = fd;
554 reconn->vsocket = vsocket;
555 pthread_mutex_lock(&reconn_list.mutex);
556 TAILQ_INSERT_TAIL(&reconn_list.head, reconn, next);
557 pthread_mutex_unlock(&reconn_list.mutex);
558
559 return 0;
560 }
561
562 static struct vhost_user_socket *
find_vhost_user_socket(const char * path)563 find_vhost_user_socket(const char *path)
564 {
565 int i;
566
567 if (path == NULL)
568 return NULL;
569
570 for (i = 0; i < vhost_user.vsocket_cnt; i++) {
571 struct vhost_user_socket *vsocket = vhost_user.vsockets[i];
572
573 if (!strcmp(vsocket->path, path))
574 return vsocket;
575 }
576
577 return NULL;
578 }
579
580 int
rte_vhost_driver_attach_vdpa_device(const char * path,struct rte_vdpa_device * dev)581 rte_vhost_driver_attach_vdpa_device(const char *path,
582 struct rte_vdpa_device *dev)
583 {
584 struct vhost_user_socket *vsocket;
585
586 if (dev == NULL || path == NULL)
587 return -1;
588
589 pthread_mutex_lock(&vhost_user.mutex);
590 vsocket = find_vhost_user_socket(path);
591 if (vsocket)
592 vsocket->vdpa_dev = dev;
593 pthread_mutex_unlock(&vhost_user.mutex);
594
595 return vsocket ? 0 : -1;
596 }
597
598 int
rte_vhost_driver_detach_vdpa_device(const char * path)599 rte_vhost_driver_detach_vdpa_device(const char *path)
600 {
601 struct vhost_user_socket *vsocket;
602
603 pthread_mutex_lock(&vhost_user.mutex);
604 vsocket = find_vhost_user_socket(path);
605 if (vsocket)
606 vsocket->vdpa_dev = NULL;
607 pthread_mutex_unlock(&vhost_user.mutex);
608
609 return vsocket ? 0 : -1;
610 }
611
612 struct rte_vdpa_device *
rte_vhost_driver_get_vdpa_device(const char * path)613 rte_vhost_driver_get_vdpa_device(const char *path)
614 {
615 struct vhost_user_socket *vsocket;
616 struct rte_vdpa_device *dev = NULL;
617
618 pthread_mutex_lock(&vhost_user.mutex);
619 vsocket = find_vhost_user_socket(path);
620 if (vsocket)
621 dev = vsocket->vdpa_dev;
622 pthread_mutex_unlock(&vhost_user.mutex);
623
624 return dev;
625 }
626
627 int
rte_vhost_driver_disable_features(const char * path,uint64_t features)628 rte_vhost_driver_disable_features(const char *path, uint64_t features)
629 {
630 struct vhost_user_socket *vsocket;
631
632 pthread_mutex_lock(&vhost_user.mutex);
633 vsocket = find_vhost_user_socket(path);
634
635 /* Note that use_builtin_virtio_net is not affected by this function
636 * since callers may want to selectively disable features of the
637 * built-in vhost net device backend.
638 */
639
640 if (vsocket)
641 vsocket->features &= ~features;
642 pthread_mutex_unlock(&vhost_user.mutex);
643
644 return vsocket ? 0 : -1;
645 }
646
647 int
rte_vhost_driver_enable_features(const char * path,uint64_t features)648 rte_vhost_driver_enable_features(const char *path, uint64_t features)
649 {
650 struct vhost_user_socket *vsocket;
651
652 pthread_mutex_lock(&vhost_user.mutex);
653 vsocket = find_vhost_user_socket(path);
654 if (vsocket) {
655 if ((vsocket->supported_features & features) != features) {
656 /*
657 * trying to enable features the driver doesn't
658 * support.
659 */
660 pthread_mutex_unlock(&vhost_user.mutex);
661 return -1;
662 }
663 vsocket->features |= features;
664 }
665 pthread_mutex_unlock(&vhost_user.mutex);
666
667 return vsocket ? 0 : -1;
668 }
669
670 int
rte_vhost_driver_set_features(const char * path,uint64_t features)671 rte_vhost_driver_set_features(const char *path, uint64_t features)
672 {
673 struct vhost_user_socket *vsocket;
674
675 pthread_mutex_lock(&vhost_user.mutex);
676 vsocket = find_vhost_user_socket(path);
677 if (vsocket) {
678 vsocket->supported_features = features;
679 vsocket->features = features;
680
681 /* Anyone setting feature bits is implementing their own vhost
682 * device backend.
683 */
684 vsocket->use_builtin_virtio_net = false;
685 }
686 pthread_mutex_unlock(&vhost_user.mutex);
687
688 return vsocket ? 0 : -1;
689 }
690
691 int
rte_vhost_driver_get_features(const char * path,uint64_t * features)692 rte_vhost_driver_get_features(const char *path, uint64_t *features)
693 {
694 struct vhost_user_socket *vsocket;
695 uint64_t vdpa_features;
696 struct rte_vdpa_device *vdpa_dev;
697 int ret = 0;
698
699 pthread_mutex_lock(&vhost_user.mutex);
700 vsocket = find_vhost_user_socket(path);
701 if (!vsocket) {
702 VHOST_LOG_CONFIG(ERR,
703 "socket file %s is not registered yet.\n", path);
704 ret = -1;
705 goto unlock_exit;
706 }
707
708 vdpa_dev = vsocket->vdpa_dev;
709 if (!vdpa_dev) {
710 *features = vsocket->features;
711 goto unlock_exit;
712 }
713
714 if (vdpa_dev->ops->get_features(vdpa_dev, &vdpa_features) < 0) {
715 VHOST_LOG_CONFIG(ERR,
716 "failed to get vdpa features "
717 "for socket file %s.\n", path);
718 ret = -1;
719 goto unlock_exit;
720 }
721
722 *features = vsocket->features & vdpa_features;
723
724 unlock_exit:
725 pthread_mutex_unlock(&vhost_user.mutex);
726 return ret;
727 }
728
729 int
rte_vhost_driver_set_protocol_features(const char * path,uint64_t protocol_features)730 rte_vhost_driver_set_protocol_features(const char *path,
731 uint64_t protocol_features)
732 {
733 struct vhost_user_socket *vsocket;
734
735 pthread_mutex_lock(&vhost_user.mutex);
736 vsocket = find_vhost_user_socket(path);
737 if (vsocket)
738 vsocket->protocol_features = protocol_features;
739 pthread_mutex_unlock(&vhost_user.mutex);
740 return vsocket ? 0 : -1;
741 }
742
743 int
rte_vhost_driver_get_protocol_features(const char * path,uint64_t * protocol_features)744 rte_vhost_driver_get_protocol_features(const char *path,
745 uint64_t *protocol_features)
746 {
747 struct vhost_user_socket *vsocket;
748 uint64_t vdpa_protocol_features;
749 struct rte_vdpa_device *vdpa_dev;
750 int ret = 0;
751
752 pthread_mutex_lock(&vhost_user.mutex);
753 vsocket = find_vhost_user_socket(path);
754 if (!vsocket) {
755 VHOST_LOG_CONFIG(ERR,
756 "socket file %s is not registered yet.\n", path);
757 ret = -1;
758 goto unlock_exit;
759 }
760
761 vdpa_dev = vsocket->vdpa_dev;
762 if (!vdpa_dev) {
763 *protocol_features = vsocket->protocol_features;
764 goto unlock_exit;
765 }
766
767 if (vdpa_dev->ops->get_protocol_features(vdpa_dev,
768 &vdpa_protocol_features) < 0) {
769 VHOST_LOG_CONFIG(ERR,
770 "failed to get vdpa protocol features "
771 "for socket file %s.\n", path);
772 ret = -1;
773 goto unlock_exit;
774 }
775
776 *protocol_features = vsocket->protocol_features
777 & vdpa_protocol_features;
778
779 unlock_exit:
780 pthread_mutex_unlock(&vhost_user.mutex);
781 return ret;
782 }
783
784 int
rte_vhost_driver_get_queue_num(const char * path,uint32_t * queue_num)785 rte_vhost_driver_get_queue_num(const char *path, uint32_t *queue_num)
786 {
787 struct vhost_user_socket *vsocket;
788 uint32_t vdpa_queue_num;
789 struct rte_vdpa_device *vdpa_dev;
790 int ret = 0;
791
792 pthread_mutex_lock(&vhost_user.mutex);
793 vsocket = find_vhost_user_socket(path);
794 if (!vsocket) {
795 VHOST_LOG_CONFIG(ERR,
796 "socket file %s is not registered yet.\n", path);
797 ret = -1;
798 goto unlock_exit;
799 }
800
801 vdpa_dev = vsocket->vdpa_dev;
802 if (!vdpa_dev) {
803 *queue_num = VHOST_MAX_QUEUE_PAIRS;
804 goto unlock_exit;
805 }
806
807 if (vdpa_dev->ops->get_queue_num(vdpa_dev, &vdpa_queue_num) < 0) {
808 VHOST_LOG_CONFIG(ERR,
809 "failed to get vdpa queue number "
810 "for socket file %s.\n", path);
811 ret = -1;
812 goto unlock_exit;
813 }
814
815 *queue_num = RTE_MIN((uint32_t)VHOST_MAX_QUEUE_PAIRS, vdpa_queue_num);
816
817 unlock_exit:
818 pthread_mutex_unlock(&vhost_user.mutex);
819 return ret;
820 }
821
822 static void
vhost_user_socket_mem_free(struct vhost_user_socket * vsocket)823 vhost_user_socket_mem_free(struct vhost_user_socket *vsocket)
824 {
825 if (vsocket && vsocket->path) {
826 free(vsocket->path);
827 vsocket->path = NULL;
828 }
829
830 if (vsocket) {
831 free(vsocket);
832 vsocket = NULL;
833 }
834 }
835
836 /*
837 * Register a new vhost-user socket; here we could act as server
838 * (the default case), or client (when RTE_VHOST_USER_CLIENT) flag
839 * is set.
840 */
841 int
rte_vhost_driver_register(const char * path,uint64_t flags)842 rte_vhost_driver_register(const char *path, uint64_t flags)
843 {
844 int ret = -1;
845 struct vhost_user_socket *vsocket;
846
847 if (!path)
848 return -1;
849
850 pthread_mutex_lock(&vhost_user.mutex);
851
852 if (vhost_user.vsocket_cnt == MAX_VHOST_SOCKET) {
853 VHOST_LOG_CONFIG(ERR,
854 "error: the number of vhost sockets reaches maximum\n");
855 goto out;
856 }
857
858 vsocket = malloc(sizeof(struct vhost_user_socket));
859 if (!vsocket)
860 goto out;
861 memset(vsocket, 0, sizeof(struct vhost_user_socket));
862 vsocket->path = strdup(path);
863 if (vsocket->path == NULL) {
864 VHOST_LOG_CONFIG(ERR,
865 "error: failed to copy socket path string\n");
866 vhost_user_socket_mem_free(vsocket);
867 goto out;
868 }
869 TAILQ_INIT(&vsocket->conn_list);
870 ret = pthread_mutex_init(&vsocket->conn_mutex, NULL);
871 if (ret) {
872 VHOST_LOG_CONFIG(ERR,
873 "error: failed to init connection mutex\n");
874 goto out_free;
875 }
876 vsocket->vdpa_dev = NULL;
877 vsocket->extbuf = flags & RTE_VHOST_USER_EXTBUF_SUPPORT;
878 vsocket->linearbuf = flags & RTE_VHOST_USER_LINEARBUF_SUPPORT;
879 vsocket->async_copy = flags & RTE_VHOST_USER_ASYNC_COPY;
880
881 if (vsocket->async_copy &&
882 (flags & (RTE_VHOST_USER_IOMMU_SUPPORT |
883 RTE_VHOST_USER_POSTCOPY_SUPPORT))) {
884 VHOST_LOG_CONFIG(ERR, "error: enabling async copy and IOMMU "
885 "or post-copy feature simultaneously is not "
886 "supported\n");
887 goto out_mutex;
888 }
889
890 /*
891 * Set the supported features correctly for the builtin vhost-user
892 * net driver.
893 *
894 * Applications know nothing about features the builtin virtio net
895 * driver (virtio_net.c) supports, thus it's not possible for them
896 * to invoke rte_vhost_driver_set_features(). To workaround it, here
897 * we set it unconditionally. If the application want to implement
898 * another vhost-user driver (say SCSI), it should call the
899 * rte_vhost_driver_set_features(), which will overwrite following
900 * two values.
901 */
902 vsocket->use_builtin_virtio_net = true;
903 vsocket->supported_features = VIRTIO_NET_SUPPORTED_FEATURES;
904 vsocket->features = VIRTIO_NET_SUPPORTED_FEATURES;
905 vsocket->protocol_features = VHOST_USER_PROTOCOL_FEATURES;
906
907 if (vsocket->async_copy) {
908 vsocket->supported_features &= ~(1ULL << VHOST_F_LOG_ALL);
909 vsocket->features &= ~(1ULL << VHOST_F_LOG_ALL);
910 VHOST_LOG_CONFIG(INFO,
911 "Logging feature is disabled in async copy mode\n");
912 }
913
914 /*
915 * We'll not be able to receive a buffer from guest in linear mode
916 * without external buffer if it will not fit in a single mbuf, which is
917 * likely if segmentation offloading enabled.
918 */
919 if (vsocket->linearbuf && !vsocket->extbuf) {
920 uint64_t seg_offload_features =
921 (1ULL << VIRTIO_NET_F_HOST_TSO4) |
922 (1ULL << VIRTIO_NET_F_HOST_TSO6) |
923 (1ULL << VIRTIO_NET_F_HOST_UFO);
924
925 VHOST_LOG_CONFIG(INFO,
926 "Linear buffers requested without external buffers, "
927 "disabling host segmentation offloading support\n");
928 vsocket->supported_features &= ~seg_offload_features;
929 vsocket->features &= ~seg_offload_features;
930 }
931
932 if (!(flags & RTE_VHOST_USER_IOMMU_SUPPORT)) {
933 vsocket->supported_features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
934 vsocket->features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
935 }
936
937 if (!(flags & RTE_VHOST_USER_POSTCOPY_SUPPORT)) {
938 vsocket->protocol_features &=
939 ~(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT);
940 } else {
941 #ifndef RTE_LIBRTE_VHOST_POSTCOPY
942 VHOST_LOG_CONFIG(ERR,
943 "Postcopy requested but not compiled\n");
944 ret = -1;
945 goto out_mutex;
946 #endif
947 }
948
949 if ((flags & RTE_VHOST_USER_CLIENT) != 0) {
950 vsocket->reconnect = !(flags & RTE_VHOST_USER_NO_RECONNECT);
951 if (vsocket->reconnect && reconn_tid == 0) {
952 if (vhost_user_reconnect_init() != 0)
953 goto out_mutex;
954 }
955 } else {
956 vsocket->is_server = true;
957 }
958 ret = create_unix_socket(vsocket);
959 if (ret < 0) {
960 goto out_mutex;
961 }
962
963 vhost_user.vsockets[vhost_user.vsocket_cnt++] = vsocket;
964
965 pthread_mutex_unlock(&vhost_user.mutex);
966 return ret;
967
968 out_mutex:
969 if (pthread_mutex_destroy(&vsocket->conn_mutex)) {
970 VHOST_LOG_CONFIG(ERR,
971 "error: failed to destroy connection mutex\n");
972 }
973 out_free:
974 vhost_user_socket_mem_free(vsocket);
975 out:
976 pthread_mutex_unlock(&vhost_user.mutex);
977
978 return ret;
979 }
980
981 static bool
vhost_user_remove_reconnect(struct vhost_user_socket * vsocket)982 vhost_user_remove_reconnect(struct vhost_user_socket *vsocket)
983 {
984 int found = false;
985 struct vhost_user_reconnect *reconn, *next;
986
987 pthread_mutex_lock(&reconn_list.mutex);
988
989 for (reconn = TAILQ_FIRST(&reconn_list.head);
990 reconn != NULL; reconn = next) {
991 next = TAILQ_NEXT(reconn, next);
992
993 if (reconn->vsocket == vsocket) {
994 TAILQ_REMOVE(&reconn_list.head, reconn, next);
995 close(reconn->fd);
996 free(reconn);
997 found = true;
998 break;
999 }
1000 }
1001 pthread_mutex_unlock(&reconn_list.mutex);
1002 return found;
1003 }
1004
1005 /**
1006 * Unregister the specified vhost socket
1007 */
1008 int
rte_vhost_driver_unregister(const char * path)1009 rte_vhost_driver_unregister(const char *path)
1010 {
1011 int i;
1012 int count;
1013 struct vhost_user_connection *conn, *next;
1014
1015 if (path == NULL)
1016 return -1;
1017
1018 again:
1019 pthread_mutex_lock(&vhost_user.mutex);
1020
1021 for (i = 0; i < vhost_user.vsocket_cnt; i++) {
1022 struct vhost_user_socket *vsocket = vhost_user.vsockets[i];
1023
1024 if (!strcmp(vsocket->path, path)) {
1025 pthread_mutex_lock(&vsocket->conn_mutex);
1026 for (conn = TAILQ_FIRST(&vsocket->conn_list);
1027 conn != NULL;
1028 conn = next) {
1029 next = TAILQ_NEXT(conn, next);
1030
1031 /*
1032 * If r/wcb is executing, release vsocket's
1033 * conn_mutex and vhost_user's mutex locks, and
1034 * try again since the r/wcb may use the
1035 * conn_mutex and mutex locks.
1036 */
1037 if (fdset_try_del(&vhost_user.fdset,
1038 conn->connfd) == -1) {
1039 pthread_mutex_unlock(
1040 &vsocket->conn_mutex);
1041 pthread_mutex_unlock(&vhost_user.mutex);
1042 goto again;
1043 }
1044
1045 VHOST_LOG_CONFIG(INFO,
1046 "free connfd = %d for device '%s'\n",
1047 conn->connfd, path);
1048 close(conn->connfd);
1049 vhost_destroy_device(conn->vid);
1050 TAILQ_REMOVE(&vsocket->conn_list, conn, next);
1051 free(conn);
1052 }
1053 pthread_mutex_unlock(&vsocket->conn_mutex);
1054
1055 if (vsocket->is_server) {
1056 /*
1057 * If r/wcb is executing, release vhost_user's
1058 * mutex lock, and try again since the r/wcb
1059 * may use the mutex lock.
1060 */
1061 if (fdset_try_del(&vhost_user.fdset,
1062 vsocket->socket_fd) == -1) {
1063 pthread_mutex_unlock(&vhost_user.mutex);
1064 goto again;
1065 }
1066
1067 close(vsocket->socket_fd);
1068 unlink(path);
1069 } else if (vsocket->reconnect) {
1070 vhost_user_remove_reconnect(vsocket);
1071 }
1072
1073 pthread_mutex_destroy(&vsocket->conn_mutex);
1074 vhost_user_socket_mem_free(vsocket);
1075
1076 count = --vhost_user.vsocket_cnt;
1077 vhost_user.vsockets[i] = vhost_user.vsockets[count];
1078 vhost_user.vsockets[count] = NULL;
1079 pthread_mutex_unlock(&vhost_user.mutex);
1080
1081 return 0;
1082 }
1083 }
1084 pthread_mutex_unlock(&vhost_user.mutex);
1085
1086 return -1;
1087 }
1088
1089 /*
1090 * Register ops so that we can add/remove device to data core.
1091 */
1092 int
rte_vhost_driver_callback_register(const char * path,struct vhost_device_ops const * const ops)1093 rte_vhost_driver_callback_register(const char *path,
1094 struct vhost_device_ops const * const ops)
1095 {
1096 struct vhost_user_socket *vsocket;
1097
1098 pthread_mutex_lock(&vhost_user.mutex);
1099 vsocket = find_vhost_user_socket(path);
1100 if (vsocket)
1101 vsocket->notify_ops = ops;
1102 pthread_mutex_unlock(&vhost_user.mutex);
1103
1104 return vsocket ? 0 : -1;
1105 }
1106
1107 struct vhost_device_ops const *
vhost_driver_callback_get(const char * path)1108 vhost_driver_callback_get(const char *path)
1109 {
1110 struct vhost_user_socket *vsocket;
1111
1112 pthread_mutex_lock(&vhost_user.mutex);
1113 vsocket = find_vhost_user_socket(path);
1114 pthread_mutex_unlock(&vhost_user.mutex);
1115
1116 return vsocket ? vsocket->notify_ops : NULL;
1117 }
1118
1119 int
rte_vhost_driver_start(const char * path)1120 rte_vhost_driver_start(const char *path)
1121 {
1122 struct vhost_user_socket *vsocket;
1123 static pthread_t fdset_tid;
1124
1125 pthread_mutex_lock(&vhost_user.mutex);
1126 vsocket = find_vhost_user_socket(path);
1127 pthread_mutex_unlock(&vhost_user.mutex);
1128
1129 if (!vsocket)
1130 return -1;
1131
1132 if (fdset_tid == 0) {
1133 /**
1134 * create a pipe which will be waited by poll and notified to
1135 * rebuild the wait list of poll.
1136 */
1137 if (fdset_pipe_init(&vhost_user.fdset) < 0) {
1138 VHOST_LOG_CONFIG(ERR,
1139 "failed to create pipe for vhost fdset\n");
1140 return -1;
1141 }
1142
1143 int ret = rte_ctrl_thread_create(&fdset_tid,
1144 "vhost-events", NULL, fdset_event_dispatch,
1145 &vhost_user.fdset);
1146 if (ret != 0) {
1147 VHOST_LOG_CONFIG(ERR,
1148 "failed to create fdset handling thread");
1149
1150 fdset_pipe_uninit(&vhost_user.fdset);
1151 return -1;
1152 }
1153 }
1154
1155 if (vsocket->is_server)
1156 return vhost_user_start_server(vsocket);
1157 else
1158 return vhost_user_start_client(vsocket);
1159 }
1160