xref: /f-stack/dpdk/lib/librte_vhost/socket.c (revision e2391e5e)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation
3  */
4 
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <limits.h>
8 #include <stdlib.h>
9 #include <unistd.h>
10 #include <string.h>
11 #include <sys/types.h>
12 #include <sys/socket.h>
13 #include <sys/un.h>
14 #include <sys/queue.h>
15 #include <errno.h>
16 #include <fcntl.h>
17 #include <pthread.h>
18 
19 #include <rte_log.h>
20 
21 #include "fd_man.h"
22 #include "vhost.h"
23 #include "vhost_user.h"
24 
25 
26 TAILQ_HEAD(vhost_user_connection_list, vhost_user_connection);
27 
28 /*
29  * Every time rte_vhost_driver_register() is invoked, an associated
30  * vhost_user_socket struct will be created.
31  */
32 struct vhost_user_socket {
33 	struct vhost_user_connection_list conn_list;
34 	pthread_mutex_t conn_mutex;
35 	char *path;
36 	int socket_fd;
37 	struct sockaddr_un un;
38 	bool is_server;
39 	bool reconnect;
40 	bool dequeue_zero_copy;
41 	bool iommu_support;
42 	bool use_builtin_virtio_net;
43 
44 	/*
45 	 * The "supported_features" indicates the feature bits the
46 	 * vhost driver supports. The "features" indicates the feature
47 	 * bits after the rte_vhost_driver_features_disable/enable().
48 	 * It is also the final feature bits used for vhost-user
49 	 * features negotiation.
50 	 */
51 	uint64_t supported_features;
52 	uint64_t features;
53 
54 	uint64_t protocol_features;
55 
56 	/*
57 	 * Device id to identify a specific backend device.
58 	 * It's set to -1 for the default software implementation.
59 	 * If valid, one socket can have 1 connection only.
60 	 */
61 	int vdpa_dev_id;
62 
63 	struct vhost_device_ops const *notify_ops;
64 };
65 
66 struct vhost_user_connection {
67 	struct vhost_user_socket *vsocket;
68 	int connfd;
69 	int vid;
70 
71 	TAILQ_ENTRY(vhost_user_connection) next;
72 };
73 
74 #define MAX_VHOST_SOCKET 1024
75 struct vhost_user {
76 	struct vhost_user_socket *vsockets[MAX_VHOST_SOCKET];
77 	struct fdset fdset;
78 	int vsocket_cnt;
79 	pthread_mutex_t mutex;
80 };
81 
82 #define MAX_VIRTIO_BACKLOG 128
83 
84 static void vhost_user_server_new_connection(int fd, void *data, int *remove);
85 static void vhost_user_read_cb(int fd, void *dat, int *remove);
86 static int create_unix_socket(struct vhost_user_socket *vsocket);
87 static int vhost_user_start_client(struct vhost_user_socket *vsocket);
88 
89 static struct vhost_user vhost_user = {
90 	.fdset = {
91 		.fd = { [0 ... MAX_FDS - 1] = {-1, NULL, NULL, NULL, 0} },
92 		.fd_mutex = PTHREAD_MUTEX_INITIALIZER,
93 		.fd_pooling_mutex = PTHREAD_MUTEX_INITIALIZER,
94 		.num = 0
95 	},
96 	.vsocket_cnt = 0,
97 	.mutex = PTHREAD_MUTEX_INITIALIZER,
98 };
99 
100 /*
101  * return bytes# of read on success or negative val on failure. Update fdnum
102  * with number of fds read.
103  */
104 int
105 read_fd_message(int sockfd, char *buf, int buflen, int *fds, int max_fds,
106 		int *fd_num)
107 {
108 	struct iovec iov;
109 	struct msghdr msgh;
110 	char control[CMSG_SPACE(max_fds * sizeof(int))];
111 	struct cmsghdr *cmsg;
112 	int got_fds = 0;
113 	int ret;
114 
115 	*fd_num = 0;
116 
117 	memset(&msgh, 0, sizeof(msgh));
118 	iov.iov_base = buf;
119 	iov.iov_len  = buflen;
120 
121 	msgh.msg_iov = &iov;
122 	msgh.msg_iovlen = 1;
123 	msgh.msg_control = control;
124 	msgh.msg_controllen = sizeof(control);
125 
126 	ret = recvmsg(sockfd, &msgh, 0);
127 	if (ret <= 0) {
128 		RTE_LOG(ERR, VHOST_CONFIG, "recvmsg failed\n");
129 		return ret;
130 	}
131 
132 	if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
133 		RTE_LOG(ERR, VHOST_CONFIG, "truncted msg\n");
134 		return -1;
135 	}
136 
137 	for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
138 		cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
139 		if ((cmsg->cmsg_level == SOL_SOCKET) &&
140 			(cmsg->cmsg_type == SCM_RIGHTS)) {
141 			got_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
142 			*fd_num = got_fds;
143 			memcpy(fds, CMSG_DATA(cmsg), got_fds * sizeof(int));
144 			break;
145 		}
146 	}
147 
148 	/* Clear out unused file descriptors */
149 	while (got_fds < max_fds)
150 		fds[got_fds++] = -1;
151 
152 	return ret;
153 }
154 
155 int
156 send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
157 {
158 
159 	struct iovec iov;
160 	struct msghdr msgh;
161 	size_t fdsize = fd_num * sizeof(int);
162 	char control[CMSG_SPACE(fdsize)];
163 	struct cmsghdr *cmsg;
164 	int ret;
165 
166 	memset(&msgh, 0, sizeof(msgh));
167 	iov.iov_base = buf;
168 	iov.iov_len = buflen;
169 
170 	msgh.msg_iov = &iov;
171 	msgh.msg_iovlen = 1;
172 
173 	if (fds && fd_num > 0) {
174 		msgh.msg_control = control;
175 		msgh.msg_controllen = sizeof(control);
176 		cmsg = CMSG_FIRSTHDR(&msgh);
177 		if (cmsg == NULL) {
178 			RTE_LOG(ERR, VHOST_CONFIG, "cmsg == NULL\n");
179 			errno = EINVAL;
180 			return -1;
181 		}
182 		cmsg->cmsg_len = CMSG_LEN(fdsize);
183 		cmsg->cmsg_level = SOL_SOCKET;
184 		cmsg->cmsg_type = SCM_RIGHTS;
185 		memcpy(CMSG_DATA(cmsg), fds, fdsize);
186 	} else {
187 		msgh.msg_control = NULL;
188 		msgh.msg_controllen = 0;
189 	}
190 
191 	do {
192 		ret = sendmsg(sockfd, &msgh, MSG_NOSIGNAL);
193 	} while (ret < 0 && errno == EINTR);
194 
195 	if (ret < 0) {
196 		RTE_LOG(ERR, VHOST_CONFIG,  "sendmsg error\n");
197 		return ret;
198 	}
199 
200 	return ret;
201 }
202 
203 static void
204 vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
205 {
206 	int vid;
207 	size_t size;
208 	struct vhost_user_connection *conn;
209 	int ret;
210 
211 	if (vsocket == NULL)
212 		return;
213 
214 	conn = malloc(sizeof(*conn));
215 	if (conn == NULL) {
216 		close(fd);
217 		return;
218 	}
219 
220 	vid = vhost_new_device();
221 	if (vid == -1) {
222 		goto err;
223 	}
224 
225 	size = strnlen(vsocket->path, PATH_MAX);
226 	vhost_set_ifname(vid, vsocket->path, size);
227 
228 	vhost_set_builtin_virtio_net(vid, vsocket->use_builtin_virtio_net);
229 
230 	vhost_attach_vdpa_device(vid, vsocket->vdpa_dev_id);
231 
232 	if (vsocket->dequeue_zero_copy)
233 		vhost_enable_dequeue_zero_copy(vid);
234 
235 	RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", vid);
236 
237 	if (vsocket->notify_ops->new_connection) {
238 		ret = vsocket->notify_ops->new_connection(vid);
239 		if (ret < 0) {
240 			RTE_LOG(ERR, VHOST_CONFIG,
241 				"failed to add vhost user connection with fd %d\n",
242 				fd);
243 			goto err_cleanup;
244 		}
245 	}
246 
247 	conn->connfd = fd;
248 	conn->vsocket = vsocket;
249 	conn->vid = vid;
250 	ret = fdset_add(&vhost_user.fdset, fd, vhost_user_read_cb,
251 			NULL, conn);
252 	if (ret < 0) {
253 		RTE_LOG(ERR, VHOST_CONFIG,
254 			"failed to add fd %d into vhost server fdset\n",
255 			fd);
256 
257 		if (vsocket->notify_ops->destroy_connection)
258 			vsocket->notify_ops->destroy_connection(conn->vid);
259 
260 		goto err_cleanup;
261 	}
262 
263 	pthread_mutex_lock(&vsocket->conn_mutex);
264 	TAILQ_INSERT_TAIL(&vsocket->conn_list, conn, next);
265 	pthread_mutex_unlock(&vsocket->conn_mutex);
266 
267 	fdset_pipe_notify(&vhost_user.fdset);
268 	return;
269 
270 err_cleanup:
271 	vhost_destroy_device(vid);
272 err:
273 	free(conn);
274 	close(fd);
275 }
276 
277 /* call back when there is new vhost-user connection from client  */
278 static void
279 vhost_user_server_new_connection(int fd, void *dat, int *remove __rte_unused)
280 {
281 	struct vhost_user_socket *vsocket = dat;
282 
283 	fd = accept(fd, NULL, NULL);
284 	if (fd < 0)
285 		return;
286 
287 	RTE_LOG(INFO, VHOST_CONFIG, "new vhost user connection is %d\n", fd);
288 	vhost_user_add_connection(fd, vsocket);
289 }
290 
291 static void
292 vhost_user_read_cb(int connfd, void *dat, int *remove)
293 {
294 	struct vhost_user_connection *conn = dat;
295 	struct vhost_user_socket *vsocket = conn->vsocket;
296 	int ret;
297 
298 	ret = vhost_user_msg_handler(conn->vid, connfd);
299 	if (ret < 0) {
300 		struct virtio_net *dev = get_device(conn->vid);
301 
302 		close(connfd);
303 		*remove = 1;
304 
305 		if (dev)
306 			vhost_destroy_device_notify(dev);
307 
308 		if (vsocket->notify_ops->destroy_connection)
309 			vsocket->notify_ops->destroy_connection(conn->vid);
310 
311 		vhost_destroy_device(conn->vid);
312 
313 		pthread_mutex_lock(&vsocket->conn_mutex);
314 		TAILQ_REMOVE(&vsocket->conn_list, conn, next);
315 		pthread_mutex_unlock(&vsocket->conn_mutex);
316 
317 		free(conn);
318 
319 		if (vsocket->reconnect) {
320 			create_unix_socket(vsocket);
321 			vhost_user_start_client(vsocket);
322 		}
323 	}
324 }
325 
326 static int
327 create_unix_socket(struct vhost_user_socket *vsocket)
328 {
329 	int fd;
330 	struct sockaddr_un *un = &vsocket->un;
331 
332 	fd = socket(AF_UNIX, SOCK_STREAM, 0);
333 	if (fd < 0)
334 		return -1;
335 	RTE_LOG(INFO, VHOST_CONFIG, "vhost-user %s: socket created, fd: %d\n",
336 		vsocket->is_server ? "server" : "client", fd);
337 
338 	if (!vsocket->is_server && fcntl(fd, F_SETFL, O_NONBLOCK)) {
339 		RTE_LOG(ERR, VHOST_CONFIG,
340 			"vhost-user: can't set nonblocking mode for socket, fd: "
341 			"%d (%s)\n", fd, strerror(errno));
342 		close(fd);
343 		return -1;
344 	}
345 
346 	memset(un, 0, sizeof(*un));
347 	un->sun_family = AF_UNIX;
348 	strncpy(un->sun_path, vsocket->path, sizeof(un->sun_path));
349 	un->sun_path[sizeof(un->sun_path) - 1] = '\0';
350 
351 	vsocket->socket_fd = fd;
352 	return 0;
353 }
354 
355 static int
356 vhost_user_start_server(struct vhost_user_socket *vsocket)
357 {
358 	int ret;
359 	int fd = vsocket->socket_fd;
360 	const char *path = vsocket->path;
361 
362 	/*
363 	 * bind () may fail if the socket file with the same name already
364 	 * exists. But the library obviously should not delete the file
365 	 * provided by the user, since we can not be sure that it is not
366 	 * being used by other applications. Moreover, many applications form
367 	 * socket names based on user input, which is prone to errors.
368 	 *
369 	 * The user must ensure that the socket does not exist before
370 	 * registering the vhost driver in server mode.
371 	 */
372 	ret = bind(fd, (struct sockaddr *)&vsocket->un, sizeof(vsocket->un));
373 	if (ret < 0) {
374 		RTE_LOG(ERR, VHOST_CONFIG,
375 			"failed to bind to %s: %s; remove it and try again\n",
376 			path, strerror(errno));
377 		goto err;
378 	}
379 	RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path);
380 
381 	ret = listen(fd, MAX_VIRTIO_BACKLOG);
382 	if (ret < 0)
383 		goto err;
384 
385 	ret = fdset_add(&vhost_user.fdset, fd, vhost_user_server_new_connection,
386 		  NULL, vsocket);
387 	if (ret < 0) {
388 		RTE_LOG(ERR, VHOST_CONFIG,
389 			"failed to add listen fd %d to vhost server fdset\n",
390 			fd);
391 		goto err;
392 	}
393 
394 	return 0;
395 
396 err:
397 	close(fd);
398 	return -1;
399 }
400 
401 struct vhost_user_reconnect {
402 	struct sockaddr_un un;
403 	int fd;
404 	struct vhost_user_socket *vsocket;
405 
406 	TAILQ_ENTRY(vhost_user_reconnect) next;
407 };
408 
409 TAILQ_HEAD(vhost_user_reconnect_tailq_list, vhost_user_reconnect);
410 struct vhost_user_reconnect_list {
411 	struct vhost_user_reconnect_tailq_list head;
412 	pthread_mutex_t mutex;
413 };
414 
415 static struct vhost_user_reconnect_list reconn_list;
416 static pthread_t reconn_tid;
417 
418 static int
419 vhost_user_connect_nonblock(int fd, struct sockaddr *un, size_t sz)
420 {
421 	int ret, flags;
422 
423 	ret = connect(fd, un, sz);
424 	if (ret < 0 && errno != EISCONN)
425 		return -1;
426 
427 	flags = fcntl(fd, F_GETFL, 0);
428 	if (flags < 0) {
429 		RTE_LOG(ERR, VHOST_CONFIG,
430 			"can't get flags for connfd %d\n", fd);
431 		return -2;
432 	}
433 	if ((flags & O_NONBLOCK) && fcntl(fd, F_SETFL, flags & ~O_NONBLOCK)) {
434 		RTE_LOG(ERR, VHOST_CONFIG,
435 				"can't disable nonblocking on fd %d\n", fd);
436 		return -2;
437 	}
438 	return 0;
439 }
440 
441 static void *
442 vhost_user_client_reconnect(void *arg __rte_unused)
443 {
444 	int ret;
445 	struct vhost_user_reconnect *reconn, *next;
446 
447 	while (1) {
448 		pthread_mutex_lock(&reconn_list.mutex);
449 
450 		/*
451 		 * An equal implementation of TAILQ_FOREACH_SAFE,
452 		 * which does not exist on all platforms.
453 		 */
454 		for (reconn = TAILQ_FIRST(&reconn_list.head);
455 		     reconn != NULL; reconn = next) {
456 			next = TAILQ_NEXT(reconn, next);
457 
458 			ret = vhost_user_connect_nonblock(reconn->fd,
459 						(struct sockaddr *)&reconn->un,
460 						sizeof(reconn->un));
461 			if (ret == -2) {
462 				close(reconn->fd);
463 				RTE_LOG(ERR, VHOST_CONFIG,
464 					"reconnection for fd %d failed\n",
465 					reconn->fd);
466 				goto remove_fd;
467 			}
468 			if (ret == -1)
469 				continue;
470 
471 			RTE_LOG(INFO, VHOST_CONFIG,
472 				"%s: connected\n", reconn->vsocket->path);
473 			vhost_user_add_connection(reconn->fd, reconn->vsocket);
474 remove_fd:
475 			TAILQ_REMOVE(&reconn_list.head, reconn, next);
476 			free(reconn);
477 		}
478 
479 		pthread_mutex_unlock(&reconn_list.mutex);
480 		sleep(1);
481 	}
482 
483 	return NULL;
484 }
485 
486 static int
487 vhost_user_reconnect_init(void)
488 {
489 	int ret;
490 
491 	ret = pthread_mutex_init(&reconn_list.mutex, NULL);
492 	if (ret < 0) {
493 		RTE_LOG(ERR, VHOST_CONFIG, "failed to initialize mutex");
494 		return ret;
495 	}
496 	TAILQ_INIT(&reconn_list.head);
497 
498 	ret = rte_ctrl_thread_create(&reconn_tid, "vhost_reconn", NULL,
499 			     vhost_user_client_reconnect, NULL);
500 	if (ret != 0) {
501 		RTE_LOG(ERR, VHOST_CONFIG, "failed to create reconnect thread");
502 		if (pthread_mutex_destroy(&reconn_list.mutex)) {
503 			RTE_LOG(ERR, VHOST_CONFIG,
504 				"failed to destroy reconnect mutex");
505 		}
506 	}
507 
508 	return ret;
509 }
510 
511 static int
512 vhost_user_start_client(struct vhost_user_socket *vsocket)
513 {
514 	int ret;
515 	int fd = vsocket->socket_fd;
516 	const char *path = vsocket->path;
517 	struct vhost_user_reconnect *reconn;
518 
519 	ret = vhost_user_connect_nonblock(fd, (struct sockaddr *)&vsocket->un,
520 					  sizeof(vsocket->un));
521 	if (ret == 0) {
522 		vhost_user_add_connection(fd, vsocket);
523 		return 0;
524 	}
525 
526 	RTE_LOG(WARNING, VHOST_CONFIG,
527 		"failed to connect to %s: %s\n",
528 		path, strerror(errno));
529 
530 	if (ret == -2 || !vsocket->reconnect) {
531 		close(fd);
532 		return -1;
533 	}
534 
535 	RTE_LOG(INFO, VHOST_CONFIG, "%s: reconnecting...\n", path);
536 	reconn = malloc(sizeof(*reconn));
537 	if (reconn == NULL) {
538 		RTE_LOG(ERR, VHOST_CONFIG,
539 			"failed to allocate memory for reconnect\n");
540 		close(fd);
541 		return -1;
542 	}
543 	reconn->un = vsocket->un;
544 	reconn->fd = fd;
545 	reconn->vsocket = vsocket;
546 	pthread_mutex_lock(&reconn_list.mutex);
547 	TAILQ_INSERT_TAIL(&reconn_list.head, reconn, next);
548 	pthread_mutex_unlock(&reconn_list.mutex);
549 
550 	return 0;
551 }
552 
553 static struct vhost_user_socket *
554 find_vhost_user_socket(const char *path)
555 {
556 	int i;
557 
558 	if (path == NULL)
559 		return NULL;
560 
561 	for (i = 0; i < vhost_user.vsocket_cnt; i++) {
562 		struct vhost_user_socket *vsocket = vhost_user.vsockets[i];
563 
564 		if (!strcmp(vsocket->path, path))
565 			return vsocket;
566 	}
567 
568 	return NULL;
569 }
570 
571 int
572 rte_vhost_driver_attach_vdpa_device(const char *path, int did)
573 {
574 	struct vhost_user_socket *vsocket;
575 
576 	if (rte_vdpa_get_device(did) == NULL || path == NULL)
577 		return -1;
578 
579 	pthread_mutex_lock(&vhost_user.mutex);
580 	vsocket = find_vhost_user_socket(path);
581 	if (vsocket)
582 		vsocket->vdpa_dev_id = did;
583 	pthread_mutex_unlock(&vhost_user.mutex);
584 
585 	return vsocket ? 0 : -1;
586 }
587 
588 int
589 rte_vhost_driver_detach_vdpa_device(const char *path)
590 {
591 	struct vhost_user_socket *vsocket;
592 
593 	pthread_mutex_lock(&vhost_user.mutex);
594 	vsocket = find_vhost_user_socket(path);
595 	if (vsocket)
596 		vsocket->vdpa_dev_id = -1;
597 	pthread_mutex_unlock(&vhost_user.mutex);
598 
599 	return vsocket ? 0 : -1;
600 }
601 
602 int
603 rte_vhost_driver_get_vdpa_device_id(const char *path)
604 {
605 	struct vhost_user_socket *vsocket;
606 	int did = -1;
607 
608 	pthread_mutex_lock(&vhost_user.mutex);
609 	vsocket = find_vhost_user_socket(path);
610 	if (vsocket)
611 		did = vsocket->vdpa_dev_id;
612 	pthread_mutex_unlock(&vhost_user.mutex);
613 
614 	return did;
615 }
616 
617 int
618 rte_vhost_driver_disable_features(const char *path, uint64_t features)
619 {
620 	struct vhost_user_socket *vsocket;
621 
622 	pthread_mutex_lock(&vhost_user.mutex);
623 	vsocket = find_vhost_user_socket(path);
624 
625 	/* Note that use_builtin_virtio_net is not affected by this function
626 	 * since callers may want to selectively disable features of the
627 	 * built-in vhost net device backend.
628 	 */
629 
630 	if (vsocket)
631 		vsocket->features &= ~features;
632 	pthread_mutex_unlock(&vhost_user.mutex);
633 
634 	return vsocket ? 0 : -1;
635 }
636 
637 int
638 rte_vhost_driver_enable_features(const char *path, uint64_t features)
639 {
640 	struct vhost_user_socket *vsocket;
641 
642 	pthread_mutex_lock(&vhost_user.mutex);
643 	vsocket = find_vhost_user_socket(path);
644 	if (vsocket) {
645 		if ((vsocket->supported_features & features) != features) {
646 			/*
647 			 * trying to enable features the driver doesn't
648 			 * support.
649 			 */
650 			pthread_mutex_unlock(&vhost_user.mutex);
651 			return -1;
652 		}
653 		vsocket->features |= features;
654 	}
655 	pthread_mutex_unlock(&vhost_user.mutex);
656 
657 	return vsocket ? 0 : -1;
658 }
659 
660 int
661 rte_vhost_driver_set_features(const char *path, uint64_t features)
662 {
663 	struct vhost_user_socket *vsocket;
664 
665 	pthread_mutex_lock(&vhost_user.mutex);
666 	vsocket = find_vhost_user_socket(path);
667 	if (vsocket) {
668 		vsocket->supported_features = features;
669 		vsocket->features = features;
670 
671 		/* Anyone setting feature bits is implementing their own vhost
672 		 * device backend.
673 		 */
674 		vsocket->use_builtin_virtio_net = false;
675 	}
676 	pthread_mutex_unlock(&vhost_user.mutex);
677 
678 	return vsocket ? 0 : -1;
679 }
680 
681 int
682 rte_vhost_driver_get_features(const char *path, uint64_t *features)
683 {
684 	struct vhost_user_socket *vsocket;
685 	uint64_t vdpa_features;
686 	struct rte_vdpa_device *vdpa_dev;
687 	int did = -1;
688 	int ret = 0;
689 
690 	pthread_mutex_lock(&vhost_user.mutex);
691 	vsocket = find_vhost_user_socket(path);
692 	if (!vsocket) {
693 		RTE_LOG(ERR, VHOST_CONFIG,
694 			"socket file %s is not registered yet.\n", path);
695 		ret = -1;
696 		goto unlock_exit;
697 	}
698 
699 	did = vsocket->vdpa_dev_id;
700 	vdpa_dev = rte_vdpa_get_device(did);
701 	if (!vdpa_dev || !vdpa_dev->ops->get_features) {
702 		*features = vsocket->features;
703 		goto unlock_exit;
704 	}
705 
706 	if (vdpa_dev->ops->get_features(did, &vdpa_features) < 0) {
707 		RTE_LOG(ERR, VHOST_CONFIG,
708 				"failed to get vdpa features "
709 				"for socket file %s.\n", path);
710 		ret = -1;
711 		goto unlock_exit;
712 	}
713 
714 	*features = vsocket->features & vdpa_features;
715 
716 unlock_exit:
717 	pthread_mutex_unlock(&vhost_user.mutex);
718 	return ret;
719 }
720 
721 int
722 rte_vhost_driver_get_protocol_features(const char *path,
723 		uint64_t *protocol_features)
724 {
725 	struct vhost_user_socket *vsocket;
726 	uint64_t vdpa_protocol_features;
727 	struct rte_vdpa_device *vdpa_dev;
728 	int did = -1;
729 	int ret = 0;
730 
731 	pthread_mutex_lock(&vhost_user.mutex);
732 	vsocket = find_vhost_user_socket(path);
733 	if (!vsocket) {
734 		RTE_LOG(ERR, VHOST_CONFIG,
735 			"socket file %s is not registered yet.\n", path);
736 		ret = -1;
737 		goto unlock_exit;
738 	}
739 
740 	did = vsocket->vdpa_dev_id;
741 	vdpa_dev = rte_vdpa_get_device(did);
742 	if (!vdpa_dev || !vdpa_dev->ops->get_protocol_features) {
743 		*protocol_features = vsocket->protocol_features;
744 		goto unlock_exit;
745 	}
746 
747 	if (vdpa_dev->ops->get_protocol_features(did,
748 				&vdpa_protocol_features) < 0) {
749 		RTE_LOG(ERR, VHOST_CONFIG,
750 				"failed to get vdpa protocol features "
751 				"for socket file %s.\n", path);
752 		ret = -1;
753 		goto unlock_exit;
754 	}
755 
756 	*protocol_features = vsocket->protocol_features
757 		& vdpa_protocol_features;
758 
759 unlock_exit:
760 	pthread_mutex_unlock(&vhost_user.mutex);
761 	return ret;
762 }
763 
764 int
765 rte_vhost_driver_get_queue_num(const char *path, uint32_t *queue_num)
766 {
767 	struct vhost_user_socket *vsocket;
768 	uint32_t vdpa_queue_num;
769 	struct rte_vdpa_device *vdpa_dev;
770 	int did = -1;
771 	int ret = 0;
772 
773 	pthread_mutex_lock(&vhost_user.mutex);
774 	vsocket = find_vhost_user_socket(path);
775 	if (!vsocket) {
776 		RTE_LOG(ERR, VHOST_CONFIG,
777 			"socket file %s is not registered yet.\n", path);
778 		ret = -1;
779 		goto unlock_exit;
780 	}
781 
782 	did = vsocket->vdpa_dev_id;
783 	vdpa_dev = rte_vdpa_get_device(did);
784 	if (!vdpa_dev || !vdpa_dev->ops->get_queue_num) {
785 		*queue_num = VHOST_MAX_QUEUE_PAIRS;
786 		goto unlock_exit;
787 	}
788 
789 	if (vdpa_dev->ops->get_queue_num(did, &vdpa_queue_num) < 0) {
790 		RTE_LOG(ERR, VHOST_CONFIG,
791 				"failed to get vdpa queue number "
792 				"for socket file %s.\n", path);
793 		ret = -1;
794 		goto unlock_exit;
795 	}
796 
797 	*queue_num = RTE_MIN((uint32_t)VHOST_MAX_QUEUE_PAIRS, vdpa_queue_num);
798 
799 unlock_exit:
800 	pthread_mutex_unlock(&vhost_user.mutex);
801 	return ret;
802 }
803 
804 static void
805 vhost_user_socket_mem_free(struct vhost_user_socket *vsocket)
806 {
807 	if (vsocket && vsocket->path) {
808 		free(vsocket->path);
809 		vsocket->path = NULL;
810 	}
811 
812 	if (vsocket) {
813 		free(vsocket);
814 		vsocket = NULL;
815 	}
816 }
817 
818 /*
819  * Register a new vhost-user socket; here we could act as server
820  * (the default case), or client (when RTE_VHOST_USER_CLIENT) flag
821  * is set.
822  */
823 int
824 rte_vhost_driver_register(const char *path, uint64_t flags)
825 {
826 	int ret = -1;
827 	struct vhost_user_socket *vsocket;
828 
829 	if (!path)
830 		return -1;
831 
832 	pthread_mutex_lock(&vhost_user.mutex);
833 
834 	if (vhost_user.vsocket_cnt == MAX_VHOST_SOCKET) {
835 		RTE_LOG(ERR, VHOST_CONFIG,
836 			"error: the number of vhost sockets reaches maximum\n");
837 		goto out;
838 	}
839 
840 	vsocket = malloc(sizeof(struct vhost_user_socket));
841 	if (!vsocket)
842 		goto out;
843 	memset(vsocket, 0, sizeof(struct vhost_user_socket));
844 	vsocket->path = strdup(path);
845 	if (vsocket->path == NULL) {
846 		RTE_LOG(ERR, VHOST_CONFIG,
847 			"error: failed to copy socket path string\n");
848 		vhost_user_socket_mem_free(vsocket);
849 		goto out;
850 	}
851 	TAILQ_INIT(&vsocket->conn_list);
852 	ret = pthread_mutex_init(&vsocket->conn_mutex, NULL);
853 	if (ret) {
854 		RTE_LOG(ERR, VHOST_CONFIG,
855 			"error: failed to init connection mutex\n");
856 		goto out_free;
857 	}
858 	vsocket->dequeue_zero_copy = flags & RTE_VHOST_USER_DEQUEUE_ZERO_COPY;
859 
860 	/*
861 	 * Set the supported features correctly for the builtin vhost-user
862 	 * net driver.
863 	 *
864 	 * Applications know nothing about features the builtin virtio net
865 	 * driver (virtio_net.c) supports, thus it's not possible for them
866 	 * to invoke rte_vhost_driver_set_features(). To workaround it, here
867 	 * we set it unconditionally. If the application want to implement
868 	 * another vhost-user driver (say SCSI), it should call the
869 	 * rte_vhost_driver_set_features(), which will overwrite following
870 	 * two values.
871 	 */
872 	vsocket->use_builtin_virtio_net = true;
873 	vsocket->supported_features = VIRTIO_NET_SUPPORTED_FEATURES;
874 	vsocket->features           = VIRTIO_NET_SUPPORTED_FEATURES;
875 	vsocket->protocol_features  = VHOST_USER_PROTOCOL_FEATURES;
876 
877 	/*
878 	 * Dequeue zero copy can't assure descriptors returned in order.
879 	 * Also, it requires that the guest memory is populated, which is
880 	 * not compatible with postcopy.
881 	 */
882 	if (vsocket->dequeue_zero_copy) {
883 		vsocket->supported_features &= ~(1ULL << VIRTIO_F_IN_ORDER);
884 		vsocket->features &= ~(1ULL << VIRTIO_F_IN_ORDER);
885 
886 		RTE_LOG(INFO, VHOST_CONFIG,
887 			"Dequeue zero copy requested, disabling postcopy support\n");
888 		vsocket->protocol_features &=
889 			~(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT);
890 	}
891 
892 	if (!(flags & RTE_VHOST_USER_IOMMU_SUPPORT)) {
893 		vsocket->supported_features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
894 		vsocket->features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
895 	}
896 
897 	if (!(flags & RTE_VHOST_USER_POSTCOPY_SUPPORT)) {
898 		vsocket->protocol_features &=
899 			~(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT);
900 	} else {
901 #ifndef RTE_LIBRTE_VHOST_POSTCOPY
902 		RTE_LOG(ERR, VHOST_CONFIG,
903 			"Postcopy requested but not compiled\n");
904 		ret = -1;
905 		goto out_mutex;
906 #endif
907 	}
908 
909 	if ((flags & RTE_VHOST_USER_CLIENT) != 0) {
910 		vsocket->reconnect = !(flags & RTE_VHOST_USER_NO_RECONNECT);
911 		if (vsocket->reconnect && reconn_tid == 0) {
912 			if (vhost_user_reconnect_init() != 0)
913 				goto out_mutex;
914 		}
915 	} else {
916 		vsocket->is_server = true;
917 	}
918 	ret = create_unix_socket(vsocket);
919 	if (ret < 0) {
920 		goto out_mutex;
921 	}
922 
923 	vhost_user.vsockets[vhost_user.vsocket_cnt++] = vsocket;
924 
925 	pthread_mutex_unlock(&vhost_user.mutex);
926 	return ret;
927 
928 out_mutex:
929 	if (pthread_mutex_destroy(&vsocket->conn_mutex)) {
930 		RTE_LOG(ERR, VHOST_CONFIG,
931 			"error: failed to destroy connection mutex\n");
932 	}
933 out_free:
934 	vhost_user_socket_mem_free(vsocket);
935 out:
936 	pthread_mutex_unlock(&vhost_user.mutex);
937 
938 	return ret;
939 }
940 
941 static bool
942 vhost_user_remove_reconnect(struct vhost_user_socket *vsocket)
943 {
944 	int found = false;
945 	struct vhost_user_reconnect *reconn, *next;
946 
947 	pthread_mutex_lock(&reconn_list.mutex);
948 
949 	for (reconn = TAILQ_FIRST(&reconn_list.head);
950 	     reconn != NULL; reconn = next) {
951 		next = TAILQ_NEXT(reconn, next);
952 
953 		if (reconn->vsocket == vsocket) {
954 			TAILQ_REMOVE(&reconn_list.head, reconn, next);
955 			close(reconn->fd);
956 			free(reconn);
957 			found = true;
958 			break;
959 		}
960 	}
961 	pthread_mutex_unlock(&reconn_list.mutex);
962 	return found;
963 }
964 
965 /**
966  * Unregister the specified vhost socket
967  */
968 int
969 rte_vhost_driver_unregister(const char *path)
970 {
971 	int i;
972 	int count;
973 	struct vhost_user_connection *conn, *next;
974 
975 	if (path == NULL)
976 		return -1;
977 
978 again:
979 	pthread_mutex_lock(&vhost_user.mutex);
980 
981 	for (i = 0; i < vhost_user.vsocket_cnt; i++) {
982 		struct vhost_user_socket *vsocket = vhost_user.vsockets[i];
983 
984 		if (!strcmp(vsocket->path, path)) {
985 			pthread_mutex_lock(&vsocket->conn_mutex);
986 			for (conn = TAILQ_FIRST(&vsocket->conn_list);
987 			     conn != NULL;
988 			     conn = next) {
989 				next = TAILQ_NEXT(conn, next);
990 
991 				/*
992 				 * If r/wcb is executing, release the
993 				 * conn_mutex lock, and try again since
994 				 * the r/wcb may use the conn_mutex lock.
995 				 */
996 				if (fdset_try_del(&vhost_user.fdset,
997 						  conn->connfd) == -1) {
998 					pthread_mutex_unlock(
999 							&vsocket->conn_mutex);
1000 					pthread_mutex_unlock(&vhost_user.mutex);
1001 					goto again;
1002 				}
1003 
1004 				RTE_LOG(INFO, VHOST_CONFIG,
1005 					"free connfd = %d for device '%s'\n",
1006 					conn->connfd, path);
1007 				close(conn->connfd);
1008 				vhost_destroy_device(conn->vid);
1009 				TAILQ_REMOVE(&vsocket->conn_list, conn, next);
1010 				free(conn);
1011 			}
1012 			pthread_mutex_unlock(&vsocket->conn_mutex);
1013 
1014 			if (vsocket->is_server) {
1015 				fdset_del(&vhost_user.fdset,
1016 						vsocket->socket_fd);
1017 				close(vsocket->socket_fd);
1018 				unlink(path);
1019 			} else if (vsocket->reconnect) {
1020 				vhost_user_remove_reconnect(vsocket);
1021 			}
1022 
1023 			pthread_mutex_destroy(&vsocket->conn_mutex);
1024 			vhost_user_socket_mem_free(vsocket);
1025 
1026 			count = --vhost_user.vsocket_cnt;
1027 			vhost_user.vsockets[i] = vhost_user.vsockets[count];
1028 			vhost_user.vsockets[count] = NULL;
1029 			pthread_mutex_unlock(&vhost_user.mutex);
1030 
1031 			return 0;
1032 		}
1033 	}
1034 	pthread_mutex_unlock(&vhost_user.mutex);
1035 
1036 	return -1;
1037 }
1038 
1039 /*
1040  * Register ops so that we can add/remove device to data core.
1041  */
1042 int
1043 rte_vhost_driver_callback_register(const char *path,
1044 	struct vhost_device_ops const * const ops)
1045 {
1046 	struct vhost_user_socket *vsocket;
1047 
1048 	pthread_mutex_lock(&vhost_user.mutex);
1049 	vsocket = find_vhost_user_socket(path);
1050 	if (vsocket)
1051 		vsocket->notify_ops = ops;
1052 	pthread_mutex_unlock(&vhost_user.mutex);
1053 
1054 	return vsocket ? 0 : -1;
1055 }
1056 
1057 struct vhost_device_ops const *
1058 vhost_driver_callback_get(const char *path)
1059 {
1060 	struct vhost_user_socket *vsocket;
1061 
1062 	pthread_mutex_lock(&vhost_user.mutex);
1063 	vsocket = find_vhost_user_socket(path);
1064 	pthread_mutex_unlock(&vhost_user.mutex);
1065 
1066 	return vsocket ? vsocket->notify_ops : NULL;
1067 }
1068 
1069 int
1070 rte_vhost_driver_start(const char *path)
1071 {
1072 	struct vhost_user_socket *vsocket;
1073 	static pthread_t fdset_tid;
1074 
1075 	pthread_mutex_lock(&vhost_user.mutex);
1076 	vsocket = find_vhost_user_socket(path);
1077 	pthread_mutex_unlock(&vhost_user.mutex);
1078 
1079 	if (!vsocket)
1080 		return -1;
1081 
1082 	if (fdset_tid == 0) {
1083 		/**
1084 		 * create a pipe which will be waited by poll and notified to
1085 		 * rebuild the wait list of poll.
1086 		 */
1087 		if (fdset_pipe_init(&vhost_user.fdset) < 0) {
1088 			RTE_LOG(ERR, VHOST_CONFIG,
1089 				"failed to create pipe for vhost fdset\n");
1090 			return -1;
1091 		}
1092 
1093 		int ret = rte_ctrl_thread_create(&fdset_tid,
1094 			"vhost-events", NULL, fdset_event_dispatch,
1095 			&vhost_user.fdset);
1096 		if (ret != 0) {
1097 			RTE_LOG(ERR, VHOST_CONFIG,
1098 				"failed to create fdset handling thread");
1099 
1100 			fdset_pipe_uninit(&vhost_user.fdset);
1101 			return -1;
1102 		}
1103 	}
1104 
1105 	if (vsocket->is_server)
1106 		return vhost_user_start_server(vsocket);
1107 	else
1108 		return vhost_user_start_client(vsocket);
1109 }
1110