1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2016 Intel Corporation
3 */
4
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <limits.h>
8 #include <stdlib.h>
9 #include <unistd.h>
10 #include <string.h>
11 #include <sys/socket.h>
12 #include <sys/un.h>
13 #include <sys/queue.h>
14 #include <errno.h>
15 #include <fcntl.h>
16 #include <pthread.h>
17
18 #include <rte_log.h>
19
20 #include "fd_man.h"
21 #include "vhost.h"
22 #include "vhost_user.h"
23
24
25 TAILQ_HEAD(vhost_user_connection_list, vhost_user_connection);
26
27 /*
28 * Every time rte_vhost_driver_register() is invoked, an associated
29 * vhost_user_socket struct will be created.
30 */
31 struct vhost_user_socket {
32 struct vhost_user_connection_list conn_list;
33 pthread_mutex_t conn_mutex;
34 char *path;
35 int socket_fd;
36 struct sockaddr_un un;
37 bool is_server;
38 bool reconnect;
39 bool iommu_support;
40 bool use_builtin_virtio_net;
41 bool extbuf;
42 bool linearbuf;
43 bool async_copy;
44 bool net_compliant_ol_flags;
45
46 /*
47 * The "supported_features" indicates the feature bits the
48 * vhost driver supports. The "features" indicates the feature
49 * bits after the rte_vhost_driver_features_disable/enable().
50 * It is also the final feature bits used for vhost-user
51 * features negotiation.
52 */
53 uint64_t supported_features;
54 uint64_t features;
55
56 uint64_t protocol_features;
57
58 struct rte_vdpa_device *vdpa_dev;
59
60 struct rte_vhost_device_ops const *notify_ops;
61 };
62
63 struct vhost_user_connection {
64 struct vhost_user_socket *vsocket;
65 int connfd;
66 int vid;
67
68 TAILQ_ENTRY(vhost_user_connection) next;
69 };
70
71 #define MAX_VHOST_SOCKET 1024
72 struct vhost_user {
73 struct vhost_user_socket *vsockets[MAX_VHOST_SOCKET];
74 struct fdset fdset;
75 int vsocket_cnt;
76 pthread_mutex_t mutex;
77 };
78
79 #define MAX_VIRTIO_BACKLOG 128
80
81 static void vhost_user_server_new_connection(int fd, void *data, int *remove);
82 static void vhost_user_read_cb(int fd, void *dat, int *remove);
83 static int create_unix_socket(struct vhost_user_socket *vsocket);
84 static int vhost_user_start_client(struct vhost_user_socket *vsocket);
85
86 static struct vhost_user vhost_user = {
87 .fdset = {
88 .fd = { [0 ... MAX_FDS - 1] = {-1, NULL, NULL, NULL, 0} },
89 .fd_mutex = PTHREAD_MUTEX_INITIALIZER,
90 .fd_pooling_mutex = PTHREAD_MUTEX_INITIALIZER,
91 .num = 0
92 },
93 .vsocket_cnt = 0,
94 .mutex = PTHREAD_MUTEX_INITIALIZER,
95 };
96
97 /*
98 * return bytes# of read on success or negative val on failure. Update fdnum
99 * with number of fds read.
100 */
101 int
read_fd_message(char * ifname,int sockfd,char * buf,int buflen,int * fds,int max_fds,int * fd_num)102 read_fd_message(char *ifname, int sockfd, char *buf, int buflen, int *fds, int max_fds,
103 int *fd_num)
104 {
105 struct iovec iov;
106 struct msghdr msgh;
107 char control[CMSG_SPACE(max_fds * sizeof(int))];
108 struct cmsghdr *cmsg;
109 int got_fds = 0;
110 int ret;
111
112 *fd_num = 0;
113
114 memset(&msgh, 0, sizeof(msgh));
115 iov.iov_base = buf;
116 iov.iov_len = buflen;
117
118 msgh.msg_iov = &iov;
119 msgh.msg_iovlen = 1;
120 msgh.msg_control = control;
121 msgh.msg_controllen = sizeof(control);
122
123 ret = recvmsg(sockfd, &msgh, 0);
124 if (ret <= 0) {
125 if (ret)
126 VHOST_LOG_CONFIG(ERR, "(%s) recvmsg failed on fd %d (%s)\n",
127 ifname, sockfd, strerror(errno));
128 return ret;
129 }
130
131 if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
132 VHOST_LOG_CONFIG(ERR, "(%s) truncated msg (fd %d)\n", ifname, sockfd);
133 return -1;
134 }
135
136 for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
137 cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
138 if ((cmsg->cmsg_level == SOL_SOCKET) &&
139 (cmsg->cmsg_type == SCM_RIGHTS)) {
140 got_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
141 *fd_num = got_fds;
142 memcpy(fds, CMSG_DATA(cmsg), got_fds * sizeof(int));
143 break;
144 }
145 }
146
147 /* Clear out unused file descriptors */
148 while (got_fds < max_fds)
149 fds[got_fds++] = -1;
150
151 return ret;
152 }
153
154 int
send_fd_message(char * ifname,int sockfd,char * buf,int buflen,int * fds,int fd_num)155 send_fd_message(char *ifname, int sockfd, char *buf, int buflen, int *fds, int fd_num)
156 {
157
158 struct iovec iov;
159 struct msghdr msgh;
160 size_t fdsize = fd_num * sizeof(int);
161 char control[CMSG_SPACE(fdsize)];
162 struct cmsghdr *cmsg;
163 int ret;
164
165 memset(&msgh, 0, sizeof(msgh));
166 iov.iov_base = buf;
167 iov.iov_len = buflen;
168
169 msgh.msg_iov = &iov;
170 msgh.msg_iovlen = 1;
171
172 if (fds && fd_num > 0) {
173 msgh.msg_control = control;
174 msgh.msg_controllen = sizeof(control);
175 cmsg = CMSG_FIRSTHDR(&msgh);
176 if (cmsg == NULL) {
177 VHOST_LOG_CONFIG(ERR, "(%s) cmsg == NULL\n", ifname);
178 errno = EINVAL;
179 return -1;
180 }
181 cmsg->cmsg_len = CMSG_LEN(fdsize);
182 cmsg->cmsg_level = SOL_SOCKET;
183 cmsg->cmsg_type = SCM_RIGHTS;
184 memcpy(CMSG_DATA(cmsg), fds, fdsize);
185 } else {
186 msgh.msg_control = NULL;
187 msgh.msg_controllen = 0;
188 }
189
190 do {
191 ret = sendmsg(sockfd, &msgh, MSG_NOSIGNAL);
192 } while (ret < 0 && errno == EINTR);
193
194 if (ret < 0) {
195 VHOST_LOG_CONFIG(ERR, "(%s) sendmsg error on fd %d (%s)\n",
196 ifname, sockfd, strerror(errno));
197 return ret;
198 }
199
200 return ret;
201 }
202
203 static void
vhost_user_add_connection(int fd,struct vhost_user_socket * vsocket)204 vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
205 {
206 int vid;
207 size_t size;
208 struct vhost_user_connection *conn;
209 int ret;
210 struct virtio_net *dev;
211
212 if (vsocket == NULL)
213 return;
214
215 conn = malloc(sizeof(*conn));
216 if (conn == NULL) {
217 close(fd);
218 return;
219 }
220
221 vid = vhost_new_device();
222 if (vid == -1) {
223 goto err;
224 }
225
226 size = strnlen(vsocket->path, PATH_MAX);
227 vhost_set_ifname(vid, vsocket->path, size);
228
229 vhost_setup_virtio_net(vid, vsocket->use_builtin_virtio_net,
230 vsocket->net_compliant_ol_flags);
231
232 vhost_attach_vdpa_device(vid, vsocket->vdpa_dev);
233
234 if (vsocket->extbuf)
235 vhost_enable_extbuf(vid);
236
237 if (vsocket->linearbuf)
238 vhost_enable_linearbuf(vid);
239
240 if (vsocket->async_copy) {
241 dev = get_device(vid);
242
243 if (dev)
244 dev->async_copy = 1;
245 }
246
247 VHOST_LOG_CONFIG(INFO, "(%s) new device, handle is %d\n", vsocket->path, vid);
248
249 if (vsocket->notify_ops->new_connection) {
250 ret = vsocket->notify_ops->new_connection(vid);
251 if (ret < 0) {
252 VHOST_LOG_CONFIG(ERR,
253 "(%s) failed to add vhost user connection with fd %d\n",
254 vsocket->path, fd);
255 goto err_cleanup;
256 }
257 }
258
259 conn->connfd = fd;
260 conn->vsocket = vsocket;
261 conn->vid = vid;
262 ret = fdset_add(&vhost_user.fdset, fd, vhost_user_read_cb,
263 NULL, conn);
264 if (ret < 0) {
265 VHOST_LOG_CONFIG(ERR, "(%s) failed to add fd %d into vhost server fdset\n",
266 vsocket->path, fd);
267
268 if (vsocket->notify_ops->destroy_connection)
269 vsocket->notify_ops->destroy_connection(conn->vid);
270
271 goto err_cleanup;
272 }
273
274 pthread_mutex_lock(&vsocket->conn_mutex);
275 TAILQ_INSERT_TAIL(&vsocket->conn_list, conn, next);
276 pthread_mutex_unlock(&vsocket->conn_mutex);
277
278 fdset_pipe_notify(&vhost_user.fdset);
279 return;
280
281 err_cleanup:
282 vhost_destroy_device(vid);
283 err:
284 free(conn);
285 close(fd);
286 }
287
288 /* call back when there is new vhost-user connection from client */
289 static void
vhost_user_server_new_connection(int fd,void * dat,int * remove __rte_unused)290 vhost_user_server_new_connection(int fd, void *dat, int *remove __rte_unused)
291 {
292 struct vhost_user_socket *vsocket = dat;
293
294 fd = accept(fd, NULL, NULL);
295 if (fd < 0)
296 return;
297
298 VHOST_LOG_CONFIG(INFO, "(%s) new vhost user connection is %d\n",
299 vsocket->path, fd);
300 vhost_user_add_connection(fd, vsocket);
301 }
302
303 static void
vhost_user_read_cb(int connfd,void * dat,int * remove)304 vhost_user_read_cb(int connfd, void *dat, int *remove)
305 {
306 struct vhost_user_connection *conn = dat;
307 struct vhost_user_socket *vsocket = conn->vsocket;
308 int ret;
309
310 ret = vhost_user_msg_handler(conn->vid, connfd);
311 if (ret < 0) {
312 struct virtio_net *dev = get_device(conn->vid);
313
314 close(connfd);
315 *remove = 1;
316
317 if (dev)
318 vhost_destroy_device_notify(dev);
319
320 if (vsocket->notify_ops->destroy_connection)
321 vsocket->notify_ops->destroy_connection(conn->vid);
322
323 vhost_destroy_device(conn->vid);
324
325 if (vsocket->reconnect) {
326 create_unix_socket(vsocket);
327 vhost_user_start_client(vsocket);
328 }
329
330 pthread_mutex_lock(&vsocket->conn_mutex);
331 TAILQ_REMOVE(&vsocket->conn_list, conn, next);
332 pthread_mutex_unlock(&vsocket->conn_mutex);
333
334 free(conn);
335 }
336 }
337
338 static int
create_unix_socket(struct vhost_user_socket * vsocket)339 create_unix_socket(struct vhost_user_socket *vsocket)
340 {
341 int fd;
342 struct sockaddr_un *un = &vsocket->un;
343
344 fd = socket(AF_UNIX, SOCK_STREAM, 0);
345 if (fd < 0)
346 return -1;
347 VHOST_LOG_CONFIG(INFO, "(%s) vhost-user %s: socket created, fd: %d\n",
348 vsocket->path, vsocket->is_server ? "server" : "client", fd);
349
350 if (!vsocket->is_server && fcntl(fd, F_SETFL, O_NONBLOCK)) {
351 VHOST_LOG_CONFIG(ERR,
352 "(%s) vhost-user: can't set nonblocking mode for socket, fd: %d (%s)\n",
353 vsocket->path, fd, strerror(errno));
354 close(fd);
355 return -1;
356 }
357
358 memset(un, 0, sizeof(*un));
359 un->sun_family = AF_UNIX;
360 strncpy(un->sun_path, vsocket->path, sizeof(un->sun_path));
361 un->sun_path[sizeof(un->sun_path) - 1] = '\0';
362
363 vsocket->socket_fd = fd;
364 return 0;
365 }
366
367 static int
vhost_user_start_server(struct vhost_user_socket * vsocket)368 vhost_user_start_server(struct vhost_user_socket *vsocket)
369 {
370 int ret;
371 int fd = vsocket->socket_fd;
372 const char *path = vsocket->path;
373
374 /*
375 * bind () may fail if the socket file with the same name already
376 * exists. But the library obviously should not delete the file
377 * provided by the user, since we can not be sure that it is not
378 * being used by other applications. Moreover, many applications form
379 * socket names based on user input, which is prone to errors.
380 *
381 * The user must ensure that the socket does not exist before
382 * registering the vhost driver in server mode.
383 */
384 ret = bind(fd, (struct sockaddr *)&vsocket->un, sizeof(vsocket->un));
385 if (ret < 0) {
386 VHOST_LOG_CONFIG(ERR, "(%s) failed to bind: %s; remove it and try again\n",
387 path, strerror(errno));
388 goto err;
389 }
390 VHOST_LOG_CONFIG(INFO, "(%s) binding succeeded\n", path);
391
392 ret = listen(fd, MAX_VIRTIO_BACKLOG);
393 if (ret < 0)
394 goto err;
395
396 ret = fdset_add(&vhost_user.fdset, fd, vhost_user_server_new_connection,
397 NULL, vsocket);
398 if (ret < 0) {
399 VHOST_LOG_CONFIG(ERR,
400 "(%s) failed to add listen fd %d to vhost server fdset\n",
401 path, fd);
402 goto err;
403 }
404
405 return 0;
406
407 err:
408 close(fd);
409 return -1;
410 }
411
412 struct vhost_user_reconnect {
413 struct sockaddr_un un;
414 int fd;
415 struct vhost_user_socket *vsocket;
416
417 TAILQ_ENTRY(vhost_user_reconnect) next;
418 };
419
420 TAILQ_HEAD(vhost_user_reconnect_tailq_list, vhost_user_reconnect);
421 struct vhost_user_reconnect_list {
422 struct vhost_user_reconnect_tailq_list head;
423 pthread_mutex_t mutex;
424 };
425
426 static struct vhost_user_reconnect_list reconn_list;
427 static pthread_t reconn_tid;
428
429 static int
vhost_user_connect_nonblock(char * path,int fd,struct sockaddr * un,size_t sz)430 vhost_user_connect_nonblock(char *path, int fd, struct sockaddr *un, size_t sz)
431 {
432 int ret, flags;
433
434 ret = connect(fd, un, sz);
435 if (ret < 0 && errno != EISCONN)
436 return -1;
437
438 flags = fcntl(fd, F_GETFL, 0);
439 if (flags < 0) {
440 VHOST_LOG_CONFIG(ERR, "(%s) can't get flags for connfd %d (%s)\n",
441 path, fd, strerror(errno));
442 return -2;
443 }
444 if ((flags & O_NONBLOCK) && fcntl(fd, F_SETFL, flags & ~O_NONBLOCK)) {
445 VHOST_LOG_CONFIG(ERR, "(%s) can't disable nonblocking on fd %d\n", path, fd);
446 return -2;
447 }
448 return 0;
449 }
450
451 static void *
vhost_user_client_reconnect(void * arg __rte_unused)452 vhost_user_client_reconnect(void *arg __rte_unused)
453 {
454 int ret;
455 struct vhost_user_reconnect *reconn, *next;
456
457 while (1) {
458 pthread_mutex_lock(&reconn_list.mutex);
459
460 /*
461 * An equal implementation of TAILQ_FOREACH_SAFE,
462 * which does not exist on all platforms.
463 */
464 for (reconn = TAILQ_FIRST(&reconn_list.head);
465 reconn != NULL; reconn = next) {
466 next = TAILQ_NEXT(reconn, next);
467
468 ret = vhost_user_connect_nonblock(reconn->vsocket->path, reconn->fd,
469 (struct sockaddr *)&reconn->un,
470 sizeof(reconn->un));
471 if (ret == -2) {
472 close(reconn->fd);
473 VHOST_LOG_CONFIG(ERR, "(%s) reconnection for fd %d failed\n",
474 reconn->vsocket->path, reconn->fd);
475 goto remove_fd;
476 }
477 if (ret == -1)
478 continue;
479
480 VHOST_LOG_CONFIG(INFO, "(%s) connected\n", reconn->vsocket->path);
481 vhost_user_add_connection(reconn->fd, reconn->vsocket);
482 remove_fd:
483 TAILQ_REMOVE(&reconn_list.head, reconn, next);
484 free(reconn);
485 }
486
487 pthread_mutex_unlock(&reconn_list.mutex);
488 sleep(1);
489 }
490
491 return NULL;
492 }
493
494 static int
vhost_user_reconnect_init(void)495 vhost_user_reconnect_init(void)
496 {
497 int ret;
498
499 ret = pthread_mutex_init(&reconn_list.mutex, NULL);
500 if (ret < 0) {
501 VHOST_LOG_CONFIG(ERR, "%s: failed to initialize mutex", __func__);
502 return ret;
503 }
504 TAILQ_INIT(&reconn_list.head);
505
506 ret = rte_ctrl_thread_create(&reconn_tid, "vhost_reconn", NULL,
507 vhost_user_client_reconnect, NULL);
508 if (ret != 0) {
509 VHOST_LOG_CONFIG(ERR, "failed to create reconnect thread");
510 if (pthread_mutex_destroy(&reconn_list.mutex))
511 VHOST_LOG_CONFIG(ERR, "%s: failed to destroy reconnect mutex", __func__);
512 }
513
514 return ret;
515 }
516
517 static int
vhost_user_start_client(struct vhost_user_socket * vsocket)518 vhost_user_start_client(struct vhost_user_socket *vsocket)
519 {
520 int ret;
521 int fd = vsocket->socket_fd;
522 const char *path = vsocket->path;
523 struct vhost_user_reconnect *reconn;
524
525 ret = vhost_user_connect_nonblock(vsocket->path, fd, (struct sockaddr *)&vsocket->un,
526 sizeof(vsocket->un));
527 if (ret == 0) {
528 vhost_user_add_connection(fd, vsocket);
529 return 0;
530 }
531
532 VHOST_LOG_CONFIG(WARNING, "(%s) failed to connect: %s\n", path, strerror(errno));
533
534 if (ret == -2 || !vsocket->reconnect) {
535 close(fd);
536 return -1;
537 }
538
539 VHOST_LOG_CONFIG(INFO, "(%s) reconnecting...\n", path);
540 reconn = malloc(sizeof(*reconn));
541 if (reconn == NULL) {
542 VHOST_LOG_CONFIG(ERR, "(%s) failed to allocate memory for reconnect\n", path);
543 close(fd);
544 return -1;
545 }
546 reconn->un = vsocket->un;
547 reconn->fd = fd;
548 reconn->vsocket = vsocket;
549 pthread_mutex_lock(&reconn_list.mutex);
550 TAILQ_INSERT_TAIL(&reconn_list.head, reconn, next);
551 pthread_mutex_unlock(&reconn_list.mutex);
552
553 return 0;
554 }
555
556 static struct vhost_user_socket *
find_vhost_user_socket(const char * path)557 find_vhost_user_socket(const char *path)
558 {
559 int i;
560
561 if (path == NULL)
562 return NULL;
563
564 for (i = 0; i < vhost_user.vsocket_cnt; i++) {
565 struct vhost_user_socket *vsocket = vhost_user.vsockets[i];
566
567 if (!strcmp(vsocket->path, path))
568 return vsocket;
569 }
570
571 return NULL;
572 }
573
574 int
rte_vhost_driver_attach_vdpa_device(const char * path,struct rte_vdpa_device * dev)575 rte_vhost_driver_attach_vdpa_device(const char *path,
576 struct rte_vdpa_device *dev)
577 {
578 struct vhost_user_socket *vsocket;
579
580 if (dev == NULL || path == NULL)
581 return -1;
582
583 pthread_mutex_lock(&vhost_user.mutex);
584 vsocket = find_vhost_user_socket(path);
585 if (vsocket)
586 vsocket->vdpa_dev = dev;
587 pthread_mutex_unlock(&vhost_user.mutex);
588
589 return vsocket ? 0 : -1;
590 }
591
592 int
rte_vhost_driver_detach_vdpa_device(const char * path)593 rte_vhost_driver_detach_vdpa_device(const char *path)
594 {
595 struct vhost_user_socket *vsocket;
596
597 pthread_mutex_lock(&vhost_user.mutex);
598 vsocket = find_vhost_user_socket(path);
599 if (vsocket)
600 vsocket->vdpa_dev = NULL;
601 pthread_mutex_unlock(&vhost_user.mutex);
602
603 return vsocket ? 0 : -1;
604 }
605
606 struct rte_vdpa_device *
rte_vhost_driver_get_vdpa_device(const char * path)607 rte_vhost_driver_get_vdpa_device(const char *path)
608 {
609 struct vhost_user_socket *vsocket;
610 struct rte_vdpa_device *dev = NULL;
611
612 pthread_mutex_lock(&vhost_user.mutex);
613 vsocket = find_vhost_user_socket(path);
614 if (vsocket)
615 dev = vsocket->vdpa_dev;
616 pthread_mutex_unlock(&vhost_user.mutex);
617
618 return dev;
619 }
620
621 int
rte_vhost_driver_disable_features(const char * path,uint64_t features)622 rte_vhost_driver_disable_features(const char *path, uint64_t features)
623 {
624 struct vhost_user_socket *vsocket;
625
626 pthread_mutex_lock(&vhost_user.mutex);
627 vsocket = find_vhost_user_socket(path);
628
629 /* Note that use_builtin_virtio_net is not affected by this function
630 * since callers may want to selectively disable features of the
631 * built-in vhost net device backend.
632 */
633
634 if (vsocket)
635 vsocket->features &= ~features;
636 pthread_mutex_unlock(&vhost_user.mutex);
637
638 return vsocket ? 0 : -1;
639 }
640
641 int
rte_vhost_driver_enable_features(const char * path,uint64_t features)642 rte_vhost_driver_enable_features(const char *path, uint64_t features)
643 {
644 struct vhost_user_socket *vsocket;
645
646 pthread_mutex_lock(&vhost_user.mutex);
647 vsocket = find_vhost_user_socket(path);
648 if (vsocket) {
649 if ((vsocket->supported_features & features) != features) {
650 /*
651 * trying to enable features the driver doesn't
652 * support.
653 */
654 pthread_mutex_unlock(&vhost_user.mutex);
655 return -1;
656 }
657 vsocket->features |= features;
658 }
659 pthread_mutex_unlock(&vhost_user.mutex);
660
661 return vsocket ? 0 : -1;
662 }
663
664 int
rte_vhost_driver_set_features(const char * path,uint64_t features)665 rte_vhost_driver_set_features(const char *path, uint64_t features)
666 {
667 struct vhost_user_socket *vsocket;
668
669 pthread_mutex_lock(&vhost_user.mutex);
670 vsocket = find_vhost_user_socket(path);
671 if (vsocket) {
672 vsocket->supported_features = features;
673 vsocket->features = features;
674
675 /* Anyone setting feature bits is implementing their own vhost
676 * device backend.
677 */
678 vsocket->use_builtin_virtio_net = false;
679 }
680 pthread_mutex_unlock(&vhost_user.mutex);
681
682 return vsocket ? 0 : -1;
683 }
684
685 int
rte_vhost_driver_get_features(const char * path,uint64_t * features)686 rte_vhost_driver_get_features(const char *path, uint64_t *features)
687 {
688 struct vhost_user_socket *vsocket;
689 uint64_t vdpa_features;
690 struct rte_vdpa_device *vdpa_dev;
691 int ret = 0;
692
693 pthread_mutex_lock(&vhost_user.mutex);
694 vsocket = find_vhost_user_socket(path);
695 if (!vsocket) {
696 VHOST_LOG_CONFIG(ERR, "(%s) socket file is not registered yet.\n", path);
697 ret = -1;
698 goto unlock_exit;
699 }
700
701 vdpa_dev = vsocket->vdpa_dev;
702 if (!vdpa_dev) {
703 *features = vsocket->features;
704 goto unlock_exit;
705 }
706
707 if (vdpa_dev->ops->get_features(vdpa_dev, &vdpa_features) < 0) {
708 VHOST_LOG_CONFIG(ERR, "(%s) failed to get vdpa features for socket file.\n", path);
709 ret = -1;
710 goto unlock_exit;
711 }
712
713 *features = vsocket->features & vdpa_features;
714
715 unlock_exit:
716 pthread_mutex_unlock(&vhost_user.mutex);
717 return ret;
718 }
719
720 int
rte_vhost_driver_set_protocol_features(const char * path,uint64_t protocol_features)721 rte_vhost_driver_set_protocol_features(const char *path,
722 uint64_t protocol_features)
723 {
724 struct vhost_user_socket *vsocket;
725
726 pthread_mutex_lock(&vhost_user.mutex);
727 vsocket = find_vhost_user_socket(path);
728 if (vsocket)
729 vsocket->protocol_features = protocol_features;
730 pthread_mutex_unlock(&vhost_user.mutex);
731 return vsocket ? 0 : -1;
732 }
733
734 int
rte_vhost_driver_get_protocol_features(const char * path,uint64_t * protocol_features)735 rte_vhost_driver_get_protocol_features(const char *path,
736 uint64_t *protocol_features)
737 {
738 struct vhost_user_socket *vsocket;
739 uint64_t vdpa_protocol_features;
740 struct rte_vdpa_device *vdpa_dev;
741 int ret = 0;
742
743 pthread_mutex_lock(&vhost_user.mutex);
744 vsocket = find_vhost_user_socket(path);
745 if (!vsocket) {
746 VHOST_LOG_CONFIG(ERR, "(%s) socket file is not registered yet.\n", path);
747 ret = -1;
748 goto unlock_exit;
749 }
750
751 vdpa_dev = vsocket->vdpa_dev;
752 if (!vdpa_dev) {
753 *protocol_features = vsocket->protocol_features;
754 goto unlock_exit;
755 }
756
757 if (vdpa_dev->ops->get_protocol_features(vdpa_dev,
758 &vdpa_protocol_features) < 0) {
759 VHOST_LOG_CONFIG(ERR, "(%s) failed to get vdpa protocol features.\n",
760 path);
761 ret = -1;
762 goto unlock_exit;
763 }
764
765 *protocol_features = vsocket->protocol_features
766 & vdpa_protocol_features;
767
768 unlock_exit:
769 pthread_mutex_unlock(&vhost_user.mutex);
770 return ret;
771 }
772
773 int
rte_vhost_driver_get_queue_num(const char * path,uint32_t * queue_num)774 rte_vhost_driver_get_queue_num(const char *path, uint32_t *queue_num)
775 {
776 struct vhost_user_socket *vsocket;
777 uint32_t vdpa_queue_num;
778 struct rte_vdpa_device *vdpa_dev;
779 int ret = 0;
780
781 pthread_mutex_lock(&vhost_user.mutex);
782 vsocket = find_vhost_user_socket(path);
783 if (!vsocket) {
784 VHOST_LOG_CONFIG(ERR, "(%s) socket file is not registered yet.\n", path);
785 ret = -1;
786 goto unlock_exit;
787 }
788
789 vdpa_dev = vsocket->vdpa_dev;
790 if (!vdpa_dev) {
791 *queue_num = VHOST_MAX_QUEUE_PAIRS;
792 goto unlock_exit;
793 }
794
795 if (vdpa_dev->ops->get_queue_num(vdpa_dev, &vdpa_queue_num) < 0) {
796 VHOST_LOG_CONFIG(ERR, "(%s) failed to get vdpa queue number.\n",
797 path);
798 ret = -1;
799 goto unlock_exit;
800 }
801
802 *queue_num = RTE_MIN((uint32_t)VHOST_MAX_QUEUE_PAIRS, vdpa_queue_num);
803
804 unlock_exit:
805 pthread_mutex_unlock(&vhost_user.mutex);
806 return ret;
807 }
808
809 static void
vhost_user_socket_mem_free(struct vhost_user_socket * vsocket)810 vhost_user_socket_mem_free(struct vhost_user_socket *vsocket)
811 {
812 if (vsocket && vsocket->path) {
813 free(vsocket->path);
814 vsocket->path = NULL;
815 }
816
817 if (vsocket) {
818 free(vsocket);
819 vsocket = NULL;
820 }
821 }
822
823 /*
824 * Register a new vhost-user socket; here we could act as server
825 * (the default case), or client (when RTE_VHOST_USER_CLIENT) flag
826 * is set.
827 */
828 int
rte_vhost_driver_register(const char * path,uint64_t flags)829 rte_vhost_driver_register(const char *path, uint64_t flags)
830 {
831 int ret = -1;
832 struct vhost_user_socket *vsocket;
833
834 if (!path)
835 return -1;
836
837 pthread_mutex_lock(&vhost_user.mutex);
838
839 if (vhost_user.vsocket_cnt == MAX_VHOST_SOCKET) {
840 VHOST_LOG_CONFIG(ERR, "(%s) the number of vhost sockets reaches maximum\n",
841 path);
842 goto out;
843 }
844
845 vsocket = malloc(sizeof(struct vhost_user_socket));
846 if (!vsocket)
847 goto out;
848 memset(vsocket, 0, sizeof(struct vhost_user_socket));
849 vsocket->path = strdup(path);
850 if (vsocket->path == NULL) {
851 VHOST_LOG_CONFIG(ERR, "(%s) failed to copy socket path string\n", path);
852 vhost_user_socket_mem_free(vsocket);
853 goto out;
854 }
855 TAILQ_INIT(&vsocket->conn_list);
856 ret = pthread_mutex_init(&vsocket->conn_mutex, NULL);
857 if (ret) {
858 VHOST_LOG_CONFIG(ERR, "(%s) failed to init connection mutex\n", path);
859 goto out_free;
860 }
861 vsocket->vdpa_dev = NULL;
862 vsocket->extbuf = flags & RTE_VHOST_USER_EXTBUF_SUPPORT;
863 vsocket->linearbuf = flags & RTE_VHOST_USER_LINEARBUF_SUPPORT;
864 vsocket->async_copy = flags & RTE_VHOST_USER_ASYNC_COPY;
865 vsocket->net_compliant_ol_flags = flags & RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS;
866
867 if (vsocket->async_copy &&
868 (flags & (RTE_VHOST_USER_IOMMU_SUPPORT |
869 RTE_VHOST_USER_POSTCOPY_SUPPORT))) {
870 VHOST_LOG_CONFIG(ERR, "(%s) async copy with IOMMU or post-copy not supported\n",
871 path);
872 goto out_mutex;
873 }
874
875 /*
876 * Set the supported features correctly for the builtin vhost-user
877 * net driver.
878 *
879 * Applications know nothing about features the builtin virtio net
880 * driver (virtio_net.c) supports, thus it's not possible for them
881 * to invoke rte_vhost_driver_set_features(). To workaround it, here
882 * we set it unconditionally. If the application want to implement
883 * another vhost-user driver (say SCSI), it should call the
884 * rte_vhost_driver_set_features(), which will overwrite following
885 * two values.
886 */
887 vsocket->use_builtin_virtio_net = true;
888 vsocket->supported_features = VIRTIO_NET_SUPPORTED_FEATURES;
889 vsocket->features = VIRTIO_NET_SUPPORTED_FEATURES;
890 vsocket->protocol_features = VHOST_USER_PROTOCOL_FEATURES;
891
892 if (vsocket->async_copy) {
893 vsocket->supported_features &= ~(1ULL << VHOST_F_LOG_ALL);
894 vsocket->features &= ~(1ULL << VHOST_F_LOG_ALL);
895 VHOST_LOG_CONFIG(INFO, "(%s) logging feature is disabled in async copy mode\n",
896 path);
897 }
898
899 /*
900 * We'll not be able to receive a buffer from guest in linear mode
901 * without external buffer if it will not fit in a single mbuf, which is
902 * likely if segmentation offloading enabled.
903 */
904 if (vsocket->linearbuf && !vsocket->extbuf) {
905 uint64_t seg_offload_features =
906 (1ULL << VIRTIO_NET_F_HOST_TSO4) |
907 (1ULL << VIRTIO_NET_F_HOST_TSO6) |
908 (1ULL << VIRTIO_NET_F_HOST_UFO);
909
910 VHOST_LOG_CONFIG(INFO, "(%s) Linear buffers requested without external buffers,\n",
911 path);
912 VHOST_LOG_CONFIG(INFO, "(%s) disabling host segmentation offloading support\n",
913 path);
914 vsocket->supported_features &= ~seg_offload_features;
915 vsocket->features &= ~seg_offload_features;
916 }
917
918 if (!(flags & RTE_VHOST_USER_IOMMU_SUPPORT)) {
919 vsocket->supported_features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
920 vsocket->features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
921 }
922
923 if (!(flags & RTE_VHOST_USER_POSTCOPY_SUPPORT)) {
924 vsocket->protocol_features &=
925 ~(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT);
926 } else {
927 #ifndef RTE_LIBRTE_VHOST_POSTCOPY
928 VHOST_LOG_CONFIG(ERR, "(%s) Postcopy requested but not compiled\n", path);
929 ret = -1;
930 goto out_mutex;
931 #endif
932 }
933
934 if ((flags & RTE_VHOST_USER_CLIENT) != 0) {
935 vsocket->reconnect = !(flags & RTE_VHOST_USER_NO_RECONNECT);
936 if (vsocket->reconnect && reconn_tid == 0) {
937 if (vhost_user_reconnect_init() != 0)
938 goto out_mutex;
939 }
940 } else {
941 vsocket->is_server = true;
942 }
943 ret = create_unix_socket(vsocket);
944 if (ret < 0) {
945 goto out_mutex;
946 }
947
948 vhost_user.vsockets[vhost_user.vsocket_cnt++] = vsocket;
949
950 pthread_mutex_unlock(&vhost_user.mutex);
951 return ret;
952
953 out_mutex:
954 if (pthread_mutex_destroy(&vsocket->conn_mutex)) {
955 VHOST_LOG_CONFIG(ERR, "(%s) failed to destroy connection mutex\n", path);
956 }
957 out_free:
958 vhost_user_socket_mem_free(vsocket);
959 out:
960 pthread_mutex_unlock(&vhost_user.mutex);
961
962 return ret;
963 }
964
965 static bool
vhost_user_remove_reconnect(struct vhost_user_socket * vsocket)966 vhost_user_remove_reconnect(struct vhost_user_socket *vsocket)
967 {
968 int found = false;
969 struct vhost_user_reconnect *reconn, *next;
970
971 pthread_mutex_lock(&reconn_list.mutex);
972
973 for (reconn = TAILQ_FIRST(&reconn_list.head);
974 reconn != NULL; reconn = next) {
975 next = TAILQ_NEXT(reconn, next);
976
977 if (reconn->vsocket == vsocket) {
978 TAILQ_REMOVE(&reconn_list.head, reconn, next);
979 close(reconn->fd);
980 free(reconn);
981 found = true;
982 break;
983 }
984 }
985 pthread_mutex_unlock(&reconn_list.mutex);
986 return found;
987 }
988
989 /**
990 * Unregister the specified vhost socket
991 */
992 int
rte_vhost_driver_unregister(const char * path)993 rte_vhost_driver_unregister(const char *path)
994 {
995 int i;
996 int count;
997 struct vhost_user_connection *conn, *next;
998
999 if (path == NULL)
1000 return -1;
1001
1002 again:
1003 pthread_mutex_lock(&vhost_user.mutex);
1004
1005 for (i = 0; i < vhost_user.vsocket_cnt; i++) {
1006 struct vhost_user_socket *vsocket = vhost_user.vsockets[i];
1007 if (strcmp(vsocket->path, path))
1008 continue;
1009
1010 if (vsocket->is_server) {
1011 /*
1012 * If r/wcb is executing, release vhost_user's
1013 * mutex lock, and try again since the r/wcb
1014 * may use the mutex lock.
1015 */
1016 if (fdset_try_del(&vhost_user.fdset, vsocket->socket_fd) == -1) {
1017 pthread_mutex_unlock(&vhost_user.mutex);
1018 goto again;
1019 }
1020 } else if (vsocket->reconnect) {
1021 vhost_user_remove_reconnect(vsocket);
1022 }
1023
1024 pthread_mutex_lock(&vsocket->conn_mutex);
1025 for (conn = TAILQ_FIRST(&vsocket->conn_list);
1026 conn != NULL;
1027 conn = next) {
1028 next = TAILQ_NEXT(conn, next);
1029
1030 /*
1031 * If r/wcb is executing, release vsocket's
1032 * conn_mutex and vhost_user's mutex locks, and
1033 * try again since the r/wcb may use the
1034 * conn_mutex and mutex locks.
1035 */
1036 if (fdset_try_del(&vhost_user.fdset,
1037 conn->connfd) == -1) {
1038 pthread_mutex_unlock(&vsocket->conn_mutex);
1039 pthread_mutex_unlock(&vhost_user.mutex);
1040 goto again;
1041 }
1042
1043 VHOST_LOG_CONFIG(INFO, "(%s) free connfd %d\n", path, conn->connfd);
1044 close(conn->connfd);
1045 vhost_destroy_device(conn->vid);
1046 TAILQ_REMOVE(&vsocket->conn_list, conn, next);
1047 free(conn);
1048 }
1049 pthread_mutex_unlock(&vsocket->conn_mutex);
1050
1051 if (vsocket->is_server) {
1052 close(vsocket->socket_fd);
1053 unlink(path);
1054 }
1055
1056 pthread_mutex_destroy(&vsocket->conn_mutex);
1057 vhost_user_socket_mem_free(vsocket);
1058
1059 count = --vhost_user.vsocket_cnt;
1060 vhost_user.vsockets[i] = vhost_user.vsockets[count];
1061 vhost_user.vsockets[count] = NULL;
1062 pthread_mutex_unlock(&vhost_user.mutex);
1063 return 0;
1064 }
1065 pthread_mutex_unlock(&vhost_user.mutex);
1066
1067 return -1;
1068 }
1069
1070 /*
1071 * Register ops so that we can add/remove device to data core.
1072 */
1073 int
rte_vhost_driver_callback_register(const char * path,struct rte_vhost_device_ops const * const ops)1074 rte_vhost_driver_callback_register(const char *path,
1075 struct rte_vhost_device_ops const * const ops)
1076 {
1077 struct vhost_user_socket *vsocket;
1078
1079 pthread_mutex_lock(&vhost_user.mutex);
1080 vsocket = find_vhost_user_socket(path);
1081 if (vsocket)
1082 vsocket->notify_ops = ops;
1083 pthread_mutex_unlock(&vhost_user.mutex);
1084
1085 return vsocket ? 0 : -1;
1086 }
1087
1088 struct rte_vhost_device_ops const *
vhost_driver_callback_get(const char * path)1089 vhost_driver_callback_get(const char *path)
1090 {
1091 struct vhost_user_socket *vsocket;
1092
1093 pthread_mutex_lock(&vhost_user.mutex);
1094 vsocket = find_vhost_user_socket(path);
1095 pthread_mutex_unlock(&vhost_user.mutex);
1096
1097 return vsocket ? vsocket->notify_ops : NULL;
1098 }
1099
1100 int
rte_vhost_driver_start(const char * path)1101 rte_vhost_driver_start(const char *path)
1102 {
1103 struct vhost_user_socket *vsocket;
1104 static pthread_t fdset_tid;
1105
1106 pthread_mutex_lock(&vhost_user.mutex);
1107 vsocket = find_vhost_user_socket(path);
1108 pthread_mutex_unlock(&vhost_user.mutex);
1109
1110 if (!vsocket)
1111 return -1;
1112
1113 if (fdset_tid == 0) {
1114 /**
1115 * create a pipe which will be waited by poll and notified to
1116 * rebuild the wait list of poll.
1117 */
1118 if (fdset_pipe_init(&vhost_user.fdset) < 0) {
1119 VHOST_LOG_CONFIG(ERR, "(%s) failed to create pipe for vhost fdset\n", path);
1120 return -1;
1121 }
1122
1123 int ret = rte_ctrl_thread_create(&fdset_tid,
1124 "vhost-events", NULL, fdset_event_dispatch,
1125 &vhost_user.fdset);
1126 if (ret != 0) {
1127 VHOST_LOG_CONFIG(ERR, "(%s) failed to create fdset handling thread", path);
1128
1129 fdset_pipe_uninit(&vhost_user.fdset);
1130 return -1;
1131 }
1132 }
1133
1134 if (vsocket->is_server)
1135 return vhost_user_start_server(vsocket);
1136 else
1137 return vhost_user_start_client(vsocket);
1138 }
1139