xref: /dpdk/lib/vhost/socket.c (revision 30a1de10)
199a2dd95SBruce Richardson /* SPDX-License-Identifier: BSD-3-Clause
299a2dd95SBruce Richardson  * Copyright(c) 2010-2016 Intel Corporation
399a2dd95SBruce Richardson  */
499a2dd95SBruce Richardson 
599a2dd95SBruce Richardson #include <stdint.h>
699a2dd95SBruce Richardson #include <stdio.h>
799a2dd95SBruce Richardson #include <limits.h>
899a2dd95SBruce Richardson #include <stdlib.h>
999a2dd95SBruce Richardson #include <unistd.h>
1099a2dd95SBruce Richardson #include <string.h>
1199a2dd95SBruce Richardson #include <sys/socket.h>
1299a2dd95SBruce Richardson #include <sys/un.h>
1399a2dd95SBruce Richardson #include <sys/queue.h>
1499a2dd95SBruce Richardson #include <errno.h>
1599a2dd95SBruce Richardson #include <fcntl.h>
1699a2dd95SBruce Richardson #include <pthread.h>
1799a2dd95SBruce Richardson 
1899a2dd95SBruce Richardson #include <rte_log.h>
1999a2dd95SBruce Richardson 
2099a2dd95SBruce Richardson #include "fd_man.h"
2199a2dd95SBruce Richardson #include "vhost.h"
2299a2dd95SBruce Richardson #include "vhost_user.h"
2399a2dd95SBruce Richardson 
2499a2dd95SBruce Richardson 
2599a2dd95SBruce Richardson TAILQ_HEAD(vhost_user_connection_list, vhost_user_connection);
2699a2dd95SBruce Richardson 
2799a2dd95SBruce Richardson /*
2899a2dd95SBruce Richardson  * Every time rte_vhost_driver_register() is invoked, an associated
2999a2dd95SBruce Richardson  * vhost_user_socket struct will be created.
3099a2dd95SBruce Richardson  */
3199a2dd95SBruce Richardson struct vhost_user_socket {
3299a2dd95SBruce Richardson 	struct vhost_user_connection_list conn_list;
3399a2dd95SBruce Richardson 	pthread_mutex_t conn_mutex;
3499a2dd95SBruce Richardson 	char *path;
3599a2dd95SBruce Richardson 	int socket_fd;
3699a2dd95SBruce Richardson 	struct sockaddr_un un;
3799a2dd95SBruce Richardson 	bool is_server;
3899a2dd95SBruce Richardson 	bool reconnect;
3999a2dd95SBruce Richardson 	bool iommu_support;
4099a2dd95SBruce Richardson 	bool use_builtin_virtio_net;
4199a2dd95SBruce Richardson 	bool extbuf;
4299a2dd95SBruce Richardson 	bool linearbuf;
4399a2dd95SBruce Richardson 	bool async_copy;
44ca7036b4SDavid Marchand 	bool net_compliant_ol_flags;
4599a2dd95SBruce Richardson 
4699a2dd95SBruce Richardson 	/*
4799a2dd95SBruce Richardson 	 * The "supported_features" indicates the feature bits the
4899a2dd95SBruce Richardson 	 * vhost driver supports. The "features" indicates the feature
4999a2dd95SBruce Richardson 	 * bits after the rte_vhost_driver_features_disable/enable().
5099a2dd95SBruce Richardson 	 * It is also the final feature bits used for vhost-user
5199a2dd95SBruce Richardson 	 * features negotiation.
5299a2dd95SBruce Richardson 	 */
5399a2dd95SBruce Richardson 	uint64_t supported_features;
5499a2dd95SBruce Richardson 	uint64_t features;
5599a2dd95SBruce Richardson 
5699a2dd95SBruce Richardson 	uint64_t protocol_features;
5799a2dd95SBruce Richardson 
5899a2dd95SBruce Richardson 	struct rte_vdpa_device *vdpa_dev;
5999a2dd95SBruce Richardson 
60ab4bb424SMaxime Coquelin 	struct rte_vhost_device_ops const *notify_ops;
6199a2dd95SBruce Richardson };
6299a2dd95SBruce Richardson 
6399a2dd95SBruce Richardson struct vhost_user_connection {
6499a2dd95SBruce Richardson 	struct vhost_user_socket *vsocket;
6599a2dd95SBruce Richardson 	int connfd;
6699a2dd95SBruce Richardson 	int vid;
6799a2dd95SBruce Richardson 
6899a2dd95SBruce Richardson 	TAILQ_ENTRY(vhost_user_connection) next;
6999a2dd95SBruce Richardson };
7099a2dd95SBruce Richardson 
7199a2dd95SBruce Richardson #define MAX_VHOST_SOCKET 1024
7299a2dd95SBruce Richardson struct vhost_user {
7399a2dd95SBruce Richardson 	struct vhost_user_socket *vsockets[MAX_VHOST_SOCKET];
7499a2dd95SBruce Richardson 	struct fdset fdset;
7599a2dd95SBruce Richardson 	int vsocket_cnt;
7699a2dd95SBruce Richardson 	pthread_mutex_t mutex;
7799a2dd95SBruce Richardson };
7899a2dd95SBruce Richardson 
7999a2dd95SBruce Richardson #define MAX_VIRTIO_BACKLOG 128
8099a2dd95SBruce Richardson 
8199a2dd95SBruce Richardson static void vhost_user_server_new_connection(int fd, void *data, int *remove);
8299a2dd95SBruce Richardson static void vhost_user_read_cb(int fd, void *dat, int *remove);
8399a2dd95SBruce Richardson static int create_unix_socket(struct vhost_user_socket *vsocket);
8499a2dd95SBruce Richardson static int vhost_user_start_client(struct vhost_user_socket *vsocket);
8599a2dd95SBruce Richardson 
8699a2dd95SBruce Richardson static struct vhost_user vhost_user = {
8799a2dd95SBruce Richardson 	.fdset = {
8899a2dd95SBruce Richardson 		.fd = { [0 ... MAX_FDS - 1] = {-1, NULL, NULL, NULL, 0} },
8999a2dd95SBruce Richardson 		.fd_mutex = PTHREAD_MUTEX_INITIALIZER,
9099a2dd95SBruce Richardson 		.fd_pooling_mutex = PTHREAD_MUTEX_INITIALIZER,
9199a2dd95SBruce Richardson 		.num = 0
9299a2dd95SBruce Richardson 	},
9399a2dd95SBruce Richardson 	.vsocket_cnt = 0,
9499a2dd95SBruce Richardson 	.mutex = PTHREAD_MUTEX_INITIALIZER,
9599a2dd95SBruce Richardson };
9699a2dd95SBruce Richardson 
9799a2dd95SBruce Richardson /*
9899a2dd95SBruce Richardson  * return bytes# of read on success or negative val on failure. Update fdnum
9999a2dd95SBruce Richardson  * with number of fds read.
10099a2dd95SBruce Richardson  */
10199a2dd95SBruce Richardson int
read_fd_message(char * ifname,int sockfd,char * buf,int buflen,int * fds,int max_fds,int * fd_num)102c85c35b1SMaxime Coquelin read_fd_message(char *ifname, int sockfd, char *buf, int buflen, int *fds, int max_fds,
10399a2dd95SBruce Richardson 		int *fd_num)
10499a2dd95SBruce Richardson {
10599a2dd95SBruce Richardson 	struct iovec iov;
10699a2dd95SBruce Richardson 	struct msghdr msgh;
10799a2dd95SBruce Richardson 	char control[CMSG_SPACE(max_fds * sizeof(int))];
10899a2dd95SBruce Richardson 	struct cmsghdr *cmsg;
10999a2dd95SBruce Richardson 	int got_fds = 0;
11099a2dd95SBruce Richardson 	int ret;
11199a2dd95SBruce Richardson 
11299a2dd95SBruce Richardson 	*fd_num = 0;
11399a2dd95SBruce Richardson 
11499a2dd95SBruce Richardson 	memset(&msgh, 0, sizeof(msgh));
11599a2dd95SBruce Richardson 	iov.iov_base = buf;
11699a2dd95SBruce Richardson 	iov.iov_len  = buflen;
11799a2dd95SBruce Richardson 
11899a2dd95SBruce Richardson 	msgh.msg_iov = &iov;
11999a2dd95SBruce Richardson 	msgh.msg_iovlen = 1;
12099a2dd95SBruce Richardson 	msgh.msg_control = control;
12199a2dd95SBruce Richardson 	msgh.msg_controllen = sizeof(control);
12299a2dd95SBruce Richardson 
12399a2dd95SBruce Richardson 	ret = recvmsg(sockfd, &msgh, 0);
12499a2dd95SBruce Richardson 	if (ret <= 0) {
12599a2dd95SBruce Richardson 		if (ret)
126c85c35b1SMaxime Coquelin 			VHOST_LOG_CONFIG(ERR, "(%s) recvmsg failed on fd %d (%s)\n",
127c85c35b1SMaxime Coquelin 					ifname, sockfd, strerror(errno));
12899a2dd95SBruce Richardson 		return ret;
12999a2dd95SBruce Richardson 	}
13099a2dd95SBruce Richardson 
13199a2dd95SBruce Richardson 	if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
132c85c35b1SMaxime Coquelin 		VHOST_LOG_CONFIG(ERR, "(%s) truncated msg (fd %d)\n", ifname, sockfd);
13399a2dd95SBruce Richardson 		return -1;
13499a2dd95SBruce Richardson 	}
13599a2dd95SBruce Richardson 
13699a2dd95SBruce Richardson 	for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
13799a2dd95SBruce Richardson 		cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
13899a2dd95SBruce Richardson 		if ((cmsg->cmsg_level == SOL_SOCKET) &&
13999a2dd95SBruce Richardson 			(cmsg->cmsg_type == SCM_RIGHTS)) {
14099a2dd95SBruce Richardson 			got_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
14199a2dd95SBruce Richardson 			*fd_num = got_fds;
14299a2dd95SBruce Richardson 			memcpy(fds, CMSG_DATA(cmsg), got_fds * sizeof(int));
14399a2dd95SBruce Richardson 			break;
14499a2dd95SBruce Richardson 		}
14599a2dd95SBruce Richardson 	}
14699a2dd95SBruce Richardson 
14799a2dd95SBruce Richardson 	/* Clear out unused file descriptors */
14899a2dd95SBruce Richardson 	while (got_fds < max_fds)
14999a2dd95SBruce Richardson 		fds[got_fds++] = -1;
15099a2dd95SBruce Richardson 
15199a2dd95SBruce Richardson 	return ret;
15299a2dd95SBruce Richardson }
15399a2dd95SBruce Richardson 
15499a2dd95SBruce Richardson int
send_fd_message(char * ifname,int sockfd,char * buf,int buflen,int * fds,int fd_num)155c85c35b1SMaxime Coquelin send_fd_message(char *ifname, int sockfd, char *buf, int buflen, int *fds, int fd_num)
15699a2dd95SBruce Richardson {
15799a2dd95SBruce Richardson 
15899a2dd95SBruce Richardson 	struct iovec iov;
15999a2dd95SBruce Richardson 	struct msghdr msgh;
16099a2dd95SBruce Richardson 	size_t fdsize = fd_num * sizeof(int);
16199a2dd95SBruce Richardson 	char control[CMSG_SPACE(fdsize)];
16299a2dd95SBruce Richardson 	struct cmsghdr *cmsg;
16399a2dd95SBruce Richardson 	int ret;
16499a2dd95SBruce Richardson 
16599a2dd95SBruce Richardson 	memset(&msgh, 0, sizeof(msgh));
16699a2dd95SBruce Richardson 	iov.iov_base = buf;
16799a2dd95SBruce Richardson 	iov.iov_len = buflen;
16899a2dd95SBruce Richardson 
16999a2dd95SBruce Richardson 	msgh.msg_iov = &iov;
17099a2dd95SBruce Richardson 	msgh.msg_iovlen = 1;
17199a2dd95SBruce Richardson 
17299a2dd95SBruce Richardson 	if (fds && fd_num > 0) {
17399a2dd95SBruce Richardson 		msgh.msg_control = control;
17499a2dd95SBruce Richardson 		msgh.msg_controllen = sizeof(control);
17599a2dd95SBruce Richardson 		cmsg = CMSG_FIRSTHDR(&msgh);
17699a2dd95SBruce Richardson 		if (cmsg == NULL) {
177c85c35b1SMaxime Coquelin 			VHOST_LOG_CONFIG(ERR, "(%s) cmsg == NULL\n", ifname);
17899a2dd95SBruce Richardson 			errno = EINVAL;
17999a2dd95SBruce Richardson 			return -1;
18099a2dd95SBruce Richardson 		}
18199a2dd95SBruce Richardson 		cmsg->cmsg_len = CMSG_LEN(fdsize);
18299a2dd95SBruce Richardson 		cmsg->cmsg_level = SOL_SOCKET;
18399a2dd95SBruce Richardson 		cmsg->cmsg_type = SCM_RIGHTS;
18499a2dd95SBruce Richardson 		memcpy(CMSG_DATA(cmsg), fds, fdsize);
18599a2dd95SBruce Richardson 	} else {
18699a2dd95SBruce Richardson 		msgh.msg_control = NULL;
18799a2dd95SBruce Richardson 		msgh.msg_controllen = 0;
18899a2dd95SBruce Richardson 	}
18999a2dd95SBruce Richardson 
19099a2dd95SBruce Richardson 	do {
19199a2dd95SBruce Richardson 		ret = sendmsg(sockfd, &msgh, MSG_NOSIGNAL);
19299a2dd95SBruce Richardson 	} while (ret < 0 && errno == EINTR);
19399a2dd95SBruce Richardson 
19499a2dd95SBruce Richardson 	if (ret < 0) {
195c85c35b1SMaxime Coquelin 		VHOST_LOG_CONFIG(ERR, "(%s) sendmsg error on fd %d (%s)\n",
196c85c35b1SMaxime Coquelin 				ifname, sockfd, strerror(errno));
19799a2dd95SBruce Richardson 		return ret;
19899a2dd95SBruce Richardson 	}
19999a2dd95SBruce Richardson 
20099a2dd95SBruce Richardson 	return ret;
20199a2dd95SBruce Richardson }
20299a2dd95SBruce Richardson 
20399a2dd95SBruce Richardson static void
vhost_user_add_connection(int fd,struct vhost_user_socket * vsocket)20499a2dd95SBruce Richardson vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
20599a2dd95SBruce Richardson {
20699a2dd95SBruce Richardson 	int vid;
20799a2dd95SBruce Richardson 	size_t size;
20899a2dd95SBruce Richardson 	struct vhost_user_connection *conn;
20999a2dd95SBruce Richardson 	int ret;
21099a2dd95SBruce Richardson 	struct virtio_net *dev;
21199a2dd95SBruce Richardson 
21299a2dd95SBruce Richardson 	if (vsocket == NULL)
21399a2dd95SBruce Richardson 		return;
21499a2dd95SBruce Richardson 
21599a2dd95SBruce Richardson 	conn = malloc(sizeof(*conn));
21699a2dd95SBruce Richardson 	if (conn == NULL) {
21799a2dd95SBruce Richardson 		close(fd);
21899a2dd95SBruce Richardson 		return;
21999a2dd95SBruce Richardson 	}
22099a2dd95SBruce Richardson 
22199a2dd95SBruce Richardson 	vid = vhost_new_device();
22299a2dd95SBruce Richardson 	if (vid == -1) {
22399a2dd95SBruce Richardson 		goto err;
22499a2dd95SBruce Richardson 	}
22599a2dd95SBruce Richardson 
22699a2dd95SBruce Richardson 	size = strnlen(vsocket->path, PATH_MAX);
22799a2dd95SBruce Richardson 	vhost_set_ifname(vid, vsocket->path, size);
22899a2dd95SBruce Richardson 
229ca7036b4SDavid Marchand 	vhost_setup_virtio_net(vid, vsocket->use_builtin_virtio_net,
230ca7036b4SDavid Marchand 		vsocket->net_compliant_ol_flags);
23199a2dd95SBruce Richardson 
23299a2dd95SBruce Richardson 	vhost_attach_vdpa_device(vid, vsocket->vdpa_dev);
23399a2dd95SBruce Richardson 
23499a2dd95SBruce Richardson 	if (vsocket->extbuf)
23599a2dd95SBruce Richardson 		vhost_enable_extbuf(vid);
23699a2dd95SBruce Richardson 
23799a2dd95SBruce Richardson 	if (vsocket->linearbuf)
23899a2dd95SBruce Richardson 		vhost_enable_linearbuf(vid);
23999a2dd95SBruce Richardson 
24099a2dd95SBruce Richardson 	if (vsocket->async_copy) {
24199a2dd95SBruce Richardson 		dev = get_device(vid);
24299a2dd95SBruce Richardson 
24399a2dd95SBruce Richardson 		if (dev)
24499a2dd95SBruce Richardson 			dev->async_copy = 1;
24599a2dd95SBruce Richardson 	}
24699a2dd95SBruce Richardson 
247c85c35b1SMaxime Coquelin 	VHOST_LOG_CONFIG(INFO, "(%s) new device, handle is %d\n", vsocket->path, vid);
24899a2dd95SBruce Richardson 
24999a2dd95SBruce Richardson 	if (vsocket->notify_ops->new_connection) {
25099a2dd95SBruce Richardson 		ret = vsocket->notify_ops->new_connection(vid);
25199a2dd95SBruce Richardson 		if (ret < 0) {
25299a2dd95SBruce Richardson 			VHOST_LOG_CONFIG(ERR,
253c85c35b1SMaxime Coquelin 				"(%s) failed to add vhost user connection with fd %d\n",
254c85c35b1SMaxime Coquelin 				vsocket->path, fd);
25599a2dd95SBruce Richardson 			goto err_cleanup;
25699a2dd95SBruce Richardson 		}
25799a2dd95SBruce Richardson 	}
25899a2dd95SBruce Richardson 
25999a2dd95SBruce Richardson 	conn->connfd = fd;
26099a2dd95SBruce Richardson 	conn->vsocket = vsocket;
26199a2dd95SBruce Richardson 	conn->vid = vid;
26299a2dd95SBruce Richardson 	ret = fdset_add(&vhost_user.fdset, fd, vhost_user_read_cb,
26399a2dd95SBruce Richardson 			NULL, conn);
26499a2dd95SBruce Richardson 	if (ret < 0) {
265c85c35b1SMaxime Coquelin 		VHOST_LOG_CONFIG(ERR, "(%s) failed to add fd %d into vhost server fdset\n",
266c85c35b1SMaxime Coquelin 			vsocket->path, fd);
26799a2dd95SBruce Richardson 
26899a2dd95SBruce Richardson 		if (vsocket->notify_ops->destroy_connection)
26999a2dd95SBruce Richardson 			vsocket->notify_ops->destroy_connection(conn->vid);
27099a2dd95SBruce Richardson 
27199a2dd95SBruce Richardson 		goto err_cleanup;
27299a2dd95SBruce Richardson 	}
27399a2dd95SBruce Richardson 
27499a2dd95SBruce Richardson 	pthread_mutex_lock(&vsocket->conn_mutex);
27599a2dd95SBruce Richardson 	TAILQ_INSERT_TAIL(&vsocket->conn_list, conn, next);
27699a2dd95SBruce Richardson 	pthread_mutex_unlock(&vsocket->conn_mutex);
27799a2dd95SBruce Richardson 
27899a2dd95SBruce Richardson 	fdset_pipe_notify(&vhost_user.fdset);
27999a2dd95SBruce Richardson 	return;
28099a2dd95SBruce Richardson 
28199a2dd95SBruce Richardson err_cleanup:
28299a2dd95SBruce Richardson 	vhost_destroy_device(vid);
28399a2dd95SBruce Richardson err:
28499a2dd95SBruce Richardson 	free(conn);
28599a2dd95SBruce Richardson 	close(fd);
28699a2dd95SBruce Richardson }
28799a2dd95SBruce Richardson 
28899a2dd95SBruce Richardson /* call back when there is new vhost-user connection from client  */
28999a2dd95SBruce Richardson static void
vhost_user_server_new_connection(int fd,void * dat,int * remove __rte_unused)29099a2dd95SBruce Richardson vhost_user_server_new_connection(int fd, void *dat, int *remove __rte_unused)
29199a2dd95SBruce Richardson {
29299a2dd95SBruce Richardson 	struct vhost_user_socket *vsocket = dat;
29399a2dd95SBruce Richardson 
29499a2dd95SBruce Richardson 	fd = accept(fd, NULL, NULL);
29599a2dd95SBruce Richardson 	if (fd < 0)
29699a2dd95SBruce Richardson 		return;
29799a2dd95SBruce Richardson 
298c85c35b1SMaxime Coquelin 	VHOST_LOG_CONFIG(INFO, "(%s) new vhost user connection is %d\n",
299c85c35b1SMaxime Coquelin 			vsocket->path, fd);
30099a2dd95SBruce Richardson 	vhost_user_add_connection(fd, vsocket);
30199a2dd95SBruce Richardson }
30299a2dd95SBruce Richardson 
30399a2dd95SBruce Richardson static void
vhost_user_read_cb(int connfd,void * dat,int * remove)30499a2dd95SBruce Richardson vhost_user_read_cb(int connfd, void *dat, int *remove)
30599a2dd95SBruce Richardson {
30699a2dd95SBruce Richardson 	struct vhost_user_connection *conn = dat;
30799a2dd95SBruce Richardson 	struct vhost_user_socket *vsocket = conn->vsocket;
30899a2dd95SBruce Richardson 	int ret;
30999a2dd95SBruce Richardson 
31099a2dd95SBruce Richardson 	ret = vhost_user_msg_handler(conn->vid, connfd);
31199a2dd95SBruce Richardson 	if (ret < 0) {
31299a2dd95SBruce Richardson 		struct virtio_net *dev = get_device(conn->vid);
31399a2dd95SBruce Richardson 
31499a2dd95SBruce Richardson 		close(connfd);
31599a2dd95SBruce Richardson 		*remove = 1;
31699a2dd95SBruce Richardson 
31799a2dd95SBruce Richardson 		if (dev)
31899a2dd95SBruce Richardson 			vhost_destroy_device_notify(dev);
31999a2dd95SBruce Richardson 
32099a2dd95SBruce Richardson 		if (vsocket->notify_ops->destroy_connection)
32199a2dd95SBruce Richardson 			vsocket->notify_ops->destroy_connection(conn->vid);
32299a2dd95SBruce Richardson 
32399a2dd95SBruce Richardson 		vhost_destroy_device(conn->vid);
32499a2dd95SBruce Richardson 
32599a2dd95SBruce Richardson 		if (vsocket->reconnect) {
32699a2dd95SBruce Richardson 			create_unix_socket(vsocket);
32799a2dd95SBruce Richardson 			vhost_user_start_client(vsocket);
32899a2dd95SBruce Richardson 		}
32999a2dd95SBruce Richardson 
33099a2dd95SBruce Richardson 		pthread_mutex_lock(&vsocket->conn_mutex);
33199a2dd95SBruce Richardson 		TAILQ_REMOVE(&vsocket->conn_list, conn, next);
33299a2dd95SBruce Richardson 		pthread_mutex_unlock(&vsocket->conn_mutex);
33399a2dd95SBruce Richardson 
33499a2dd95SBruce Richardson 		free(conn);
33599a2dd95SBruce Richardson 	}
33699a2dd95SBruce Richardson }
33799a2dd95SBruce Richardson 
33899a2dd95SBruce Richardson static int
create_unix_socket(struct vhost_user_socket * vsocket)33999a2dd95SBruce Richardson create_unix_socket(struct vhost_user_socket *vsocket)
34099a2dd95SBruce Richardson {
34199a2dd95SBruce Richardson 	int fd;
34299a2dd95SBruce Richardson 	struct sockaddr_un *un = &vsocket->un;
34399a2dd95SBruce Richardson 
34499a2dd95SBruce Richardson 	fd = socket(AF_UNIX, SOCK_STREAM, 0);
34599a2dd95SBruce Richardson 	if (fd < 0)
34699a2dd95SBruce Richardson 		return -1;
347c85c35b1SMaxime Coquelin 	VHOST_LOG_CONFIG(INFO, "(%s) vhost-user %s: socket created, fd: %d\n",
348c85c35b1SMaxime Coquelin 		vsocket->path, vsocket->is_server ? "server" : "client", fd);
34999a2dd95SBruce Richardson 
35099a2dd95SBruce Richardson 	if (!vsocket->is_server && fcntl(fd, F_SETFL, O_NONBLOCK)) {
35199a2dd95SBruce Richardson 		VHOST_LOG_CONFIG(ERR,
352c85c35b1SMaxime Coquelin 			"(%s) vhost-user: can't set nonblocking mode for socket, fd: %d (%s)\n",
353c85c35b1SMaxime Coquelin 			vsocket->path, fd, strerror(errno));
35499a2dd95SBruce Richardson 		close(fd);
35599a2dd95SBruce Richardson 		return -1;
35699a2dd95SBruce Richardson 	}
35799a2dd95SBruce Richardson 
35899a2dd95SBruce Richardson 	memset(un, 0, sizeof(*un));
35999a2dd95SBruce Richardson 	un->sun_family = AF_UNIX;
36099a2dd95SBruce Richardson 	strncpy(un->sun_path, vsocket->path, sizeof(un->sun_path));
36199a2dd95SBruce Richardson 	un->sun_path[sizeof(un->sun_path) - 1] = '\0';
36299a2dd95SBruce Richardson 
36399a2dd95SBruce Richardson 	vsocket->socket_fd = fd;
36499a2dd95SBruce Richardson 	return 0;
36599a2dd95SBruce Richardson }
36699a2dd95SBruce Richardson 
36799a2dd95SBruce Richardson static int
vhost_user_start_server(struct vhost_user_socket * vsocket)36899a2dd95SBruce Richardson vhost_user_start_server(struct vhost_user_socket *vsocket)
36999a2dd95SBruce Richardson {
37099a2dd95SBruce Richardson 	int ret;
37199a2dd95SBruce Richardson 	int fd = vsocket->socket_fd;
37299a2dd95SBruce Richardson 	const char *path = vsocket->path;
37399a2dd95SBruce Richardson 
37499a2dd95SBruce Richardson 	/*
37599a2dd95SBruce Richardson 	 * bind () may fail if the socket file with the same name already
37699a2dd95SBruce Richardson 	 * exists. But the library obviously should not delete the file
37799a2dd95SBruce Richardson 	 * provided by the user, since we can not be sure that it is not
37899a2dd95SBruce Richardson 	 * being used by other applications. Moreover, many applications form
37999a2dd95SBruce Richardson 	 * socket names based on user input, which is prone to errors.
38099a2dd95SBruce Richardson 	 *
38199a2dd95SBruce Richardson 	 * The user must ensure that the socket does not exist before
38299a2dd95SBruce Richardson 	 * registering the vhost driver in server mode.
38399a2dd95SBruce Richardson 	 */
38499a2dd95SBruce Richardson 	ret = bind(fd, (struct sockaddr *)&vsocket->un, sizeof(vsocket->un));
38599a2dd95SBruce Richardson 	if (ret < 0) {
386c85c35b1SMaxime Coquelin 		VHOST_LOG_CONFIG(ERR, "(%s) failed to bind: %s; remove it and try again\n",
38799a2dd95SBruce Richardson 			path, strerror(errno));
38899a2dd95SBruce Richardson 		goto err;
38999a2dd95SBruce Richardson 	}
390c85c35b1SMaxime Coquelin 	VHOST_LOG_CONFIG(INFO, "(%s) binding succeeded\n", path);
39199a2dd95SBruce Richardson 
39299a2dd95SBruce Richardson 	ret = listen(fd, MAX_VIRTIO_BACKLOG);
39399a2dd95SBruce Richardson 	if (ret < 0)
39499a2dd95SBruce Richardson 		goto err;
39599a2dd95SBruce Richardson 
39699a2dd95SBruce Richardson 	ret = fdset_add(&vhost_user.fdset, fd, vhost_user_server_new_connection,
39799a2dd95SBruce Richardson 		  NULL, vsocket);
39899a2dd95SBruce Richardson 	if (ret < 0) {
39999a2dd95SBruce Richardson 		VHOST_LOG_CONFIG(ERR,
400c85c35b1SMaxime Coquelin 			"(%s) failed to add listen fd %d to vhost server fdset\n",
401c85c35b1SMaxime Coquelin 			path, fd);
40299a2dd95SBruce Richardson 		goto err;
40399a2dd95SBruce Richardson 	}
40499a2dd95SBruce Richardson 
40599a2dd95SBruce Richardson 	return 0;
40699a2dd95SBruce Richardson 
40799a2dd95SBruce Richardson err:
40899a2dd95SBruce Richardson 	close(fd);
40999a2dd95SBruce Richardson 	return -1;
41099a2dd95SBruce Richardson }
41199a2dd95SBruce Richardson 
41299a2dd95SBruce Richardson struct vhost_user_reconnect {
41399a2dd95SBruce Richardson 	struct sockaddr_un un;
41499a2dd95SBruce Richardson 	int fd;
41599a2dd95SBruce Richardson 	struct vhost_user_socket *vsocket;
41699a2dd95SBruce Richardson 
41799a2dd95SBruce Richardson 	TAILQ_ENTRY(vhost_user_reconnect) next;
41899a2dd95SBruce Richardson };
41999a2dd95SBruce Richardson 
42099a2dd95SBruce Richardson TAILQ_HEAD(vhost_user_reconnect_tailq_list, vhost_user_reconnect);
42199a2dd95SBruce Richardson struct vhost_user_reconnect_list {
42299a2dd95SBruce Richardson 	struct vhost_user_reconnect_tailq_list head;
42399a2dd95SBruce Richardson 	pthread_mutex_t mutex;
42499a2dd95SBruce Richardson };
42599a2dd95SBruce Richardson 
42699a2dd95SBruce Richardson static struct vhost_user_reconnect_list reconn_list;
42799a2dd95SBruce Richardson static pthread_t reconn_tid;
42899a2dd95SBruce Richardson 
42999a2dd95SBruce Richardson static int
vhost_user_connect_nonblock(char * path,int fd,struct sockaddr * un,size_t sz)430c85c35b1SMaxime Coquelin vhost_user_connect_nonblock(char *path, int fd, struct sockaddr *un, size_t sz)
43199a2dd95SBruce Richardson {
43299a2dd95SBruce Richardson 	int ret, flags;
43399a2dd95SBruce Richardson 
43499a2dd95SBruce Richardson 	ret = connect(fd, un, sz);
43599a2dd95SBruce Richardson 	if (ret < 0 && errno != EISCONN)
43699a2dd95SBruce Richardson 		return -1;
43799a2dd95SBruce Richardson 
43899a2dd95SBruce Richardson 	flags = fcntl(fd, F_GETFL, 0);
43999a2dd95SBruce Richardson 	if (flags < 0) {
440c85c35b1SMaxime Coquelin 		VHOST_LOG_CONFIG(ERR, "(%s) can't get flags for connfd %d (%s)\n",
441c85c35b1SMaxime Coquelin 				path, fd, strerror(errno));
44299a2dd95SBruce Richardson 		return -2;
44399a2dd95SBruce Richardson 	}
44499a2dd95SBruce Richardson 	if ((flags & O_NONBLOCK) && fcntl(fd, F_SETFL, flags & ~O_NONBLOCK)) {
445c85c35b1SMaxime Coquelin 		VHOST_LOG_CONFIG(ERR, "(%s) can't disable nonblocking on fd %d\n", path, fd);
44699a2dd95SBruce Richardson 		return -2;
44799a2dd95SBruce Richardson 	}
44899a2dd95SBruce Richardson 	return 0;
44999a2dd95SBruce Richardson }
45099a2dd95SBruce Richardson 
45199a2dd95SBruce Richardson static void *
vhost_user_client_reconnect(void * arg __rte_unused)45299a2dd95SBruce Richardson vhost_user_client_reconnect(void *arg __rte_unused)
45399a2dd95SBruce Richardson {
45499a2dd95SBruce Richardson 	int ret;
45599a2dd95SBruce Richardson 	struct vhost_user_reconnect *reconn, *next;
45699a2dd95SBruce Richardson 
45799a2dd95SBruce Richardson 	while (1) {
45899a2dd95SBruce Richardson 		pthread_mutex_lock(&reconn_list.mutex);
45999a2dd95SBruce Richardson 
46099a2dd95SBruce Richardson 		/*
46199a2dd95SBruce Richardson 		 * An equal implementation of TAILQ_FOREACH_SAFE,
46299a2dd95SBruce Richardson 		 * which does not exist on all platforms.
46399a2dd95SBruce Richardson 		 */
46499a2dd95SBruce Richardson 		for (reconn = TAILQ_FIRST(&reconn_list.head);
46599a2dd95SBruce Richardson 		     reconn != NULL; reconn = next) {
46699a2dd95SBruce Richardson 			next = TAILQ_NEXT(reconn, next);
46799a2dd95SBruce Richardson 
468c85c35b1SMaxime Coquelin 			ret = vhost_user_connect_nonblock(reconn->vsocket->path, reconn->fd,
46999a2dd95SBruce Richardson 						(struct sockaddr *)&reconn->un,
47099a2dd95SBruce Richardson 						sizeof(reconn->un));
47199a2dd95SBruce Richardson 			if (ret == -2) {
47299a2dd95SBruce Richardson 				close(reconn->fd);
473c85c35b1SMaxime Coquelin 				VHOST_LOG_CONFIG(ERR, "(%s) reconnection for fd %d failed\n",
474c85c35b1SMaxime Coquelin 					reconn->vsocket->path, reconn->fd);
47599a2dd95SBruce Richardson 				goto remove_fd;
47699a2dd95SBruce Richardson 			}
47799a2dd95SBruce Richardson 			if (ret == -1)
47899a2dd95SBruce Richardson 				continue;
47999a2dd95SBruce Richardson 
480c85c35b1SMaxime Coquelin 			VHOST_LOG_CONFIG(INFO, "(%s) connected\n", reconn->vsocket->path);
48199a2dd95SBruce Richardson 			vhost_user_add_connection(reconn->fd, reconn->vsocket);
48299a2dd95SBruce Richardson remove_fd:
48399a2dd95SBruce Richardson 			TAILQ_REMOVE(&reconn_list.head, reconn, next);
48499a2dd95SBruce Richardson 			free(reconn);
48599a2dd95SBruce Richardson 		}
48699a2dd95SBruce Richardson 
48799a2dd95SBruce Richardson 		pthread_mutex_unlock(&reconn_list.mutex);
48899a2dd95SBruce Richardson 		sleep(1);
48999a2dd95SBruce Richardson 	}
49099a2dd95SBruce Richardson 
49199a2dd95SBruce Richardson 	return NULL;
49299a2dd95SBruce Richardson }
49399a2dd95SBruce Richardson 
49499a2dd95SBruce Richardson static int
vhost_user_reconnect_init(void)49599a2dd95SBruce Richardson vhost_user_reconnect_init(void)
49699a2dd95SBruce Richardson {
49799a2dd95SBruce Richardson 	int ret;
49899a2dd95SBruce Richardson 
49999a2dd95SBruce Richardson 	ret = pthread_mutex_init(&reconn_list.mutex, NULL);
50099a2dd95SBruce Richardson 	if (ret < 0) {
501c85c35b1SMaxime Coquelin 		VHOST_LOG_CONFIG(ERR, "%s: failed to initialize mutex", __func__);
50299a2dd95SBruce Richardson 		return ret;
50399a2dd95SBruce Richardson 	}
50499a2dd95SBruce Richardson 	TAILQ_INIT(&reconn_list.head);
50599a2dd95SBruce Richardson 
50699a2dd95SBruce Richardson 	ret = rte_ctrl_thread_create(&reconn_tid, "vhost_reconn", NULL,
50799a2dd95SBruce Richardson 			     vhost_user_client_reconnect, NULL);
50899a2dd95SBruce Richardson 	if (ret != 0) {
50999a2dd95SBruce Richardson 		VHOST_LOG_CONFIG(ERR, "failed to create reconnect thread");
510c85c35b1SMaxime Coquelin 		if (pthread_mutex_destroy(&reconn_list.mutex))
511c85c35b1SMaxime Coquelin 			VHOST_LOG_CONFIG(ERR, "%s: failed to destroy reconnect mutex", __func__);
51299a2dd95SBruce Richardson 	}
51399a2dd95SBruce Richardson 
51499a2dd95SBruce Richardson 	return ret;
51599a2dd95SBruce Richardson }
51699a2dd95SBruce Richardson 
51799a2dd95SBruce Richardson static int
vhost_user_start_client(struct vhost_user_socket * vsocket)51899a2dd95SBruce Richardson vhost_user_start_client(struct vhost_user_socket *vsocket)
51999a2dd95SBruce Richardson {
52099a2dd95SBruce Richardson 	int ret;
52199a2dd95SBruce Richardson 	int fd = vsocket->socket_fd;
52299a2dd95SBruce Richardson 	const char *path = vsocket->path;
52399a2dd95SBruce Richardson 	struct vhost_user_reconnect *reconn;
52499a2dd95SBruce Richardson 
525c85c35b1SMaxime Coquelin 	ret = vhost_user_connect_nonblock(vsocket->path, fd, (struct sockaddr *)&vsocket->un,
52699a2dd95SBruce Richardson 					  sizeof(vsocket->un));
52799a2dd95SBruce Richardson 	if (ret == 0) {
52899a2dd95SBruce Richardson 		vhost_user_add_connection(fd, vsocket);
52999a2dd95SBruce Richardson 		return 0;
53099a2dd95SBruce Richardson 	}
53199a2dd95SBruce Richardson 
532c85c35b1SMaxime Coquelin 	VHOST_LOG_CONFIG(WARNING, "(%s) failed to connect: %s\n", path, strerror(errno));
53399a2dd95SBruce Richardson 
53499a2dd95SBruce Richardson 	if (ret == -2 || !vsocket->reconnect) {
53599a2dd95SBruce Richardson 		close(fd);
53699a2dd95SBruce Richardson 		return -1;
53799a2dd95SBruce Richardson 	}
53899a2dd95SBruce Richardson 
539c85c35b1SMaxime Coquelin 	VHOST_LOG_CONFIG(INFO, "(%s) reconnecting...\n", path);
54099a2dd95SBruce Richardson 	reconn = malloc(sizeof(*reconn));
54199a2dd95SBruce Richardson 	if (reconn == NULL) {
542c85c35b1SMaxime Coquelin 		VHOST_LOG_CONFIG(ERR, "(%s) failed to allocate memory for reconnect\n", path);
54399a2dd95SBruce Richardson 		close(fd);
54499a2dd95SBruce Richardson 		return -1;
54599a2dd95SBruce Richardson 	}
54699a2dd95SBruce Richardson 	reconn->un = vsocket->un;
54799a2dd95SBruce Richardson 	reconn->fd = fd;
54899a2dd95SBruce Richardson 	reconn->vsocket = vsocket;
54999a2dd95SBruce Richardson 	pthread_mutex_lock(&reconn_list.mutex);
55099a2dd95SBruce Richardson 	TAILQ_INSERT_TAIL(&reconn_list.head, reconn, next);
55199a2dd95SBruce Richardson 	pthread_mutex_unlock(&reconn_list.mutex);
55299a2dd95SBruce Richardson 
55399a2dd95SBruce Richardson 	return 0;
55499a2dd95SBruce Richardson }
55599a2dd95SBruce Richardson 
55699a2dd95SBruce Richardson static struct vhost_user_socket *
find_vhost_user_socket(const char * path)55799a2dd95SBruce Richardson find_vhost_user_socket(const char *path)
55899a2dd95SBruce Richardson {
55999a2dd95SBruce Richardson 	int i;
56099a2dd95SBruce Richardson 
56199a2dd95SBruce Richardson 	if (path == NULL)
56299a2dd95SBruce Richardson 		return NULL;
56399a2dd95SBruce Richardson 
56499a2dd95SBruce Richardson 	for (i = 0; i < vhost_user.vsocket_cnt; i++) {
56599a2dd95SBruce Richardson 		struct vhost_user_socket *vsocket = vhost_user.vsockets[i];
56699a2dd95SBruce Richardson 
56799a2dd95SBruce Richardson 		if (!strcmp(vsocket->path, path))
56899a2dd95SBruce Richardson 			return vsocket;
56999a2dd95SBruce Richardson 	}
57099a2dd95SBruce Richardson 
57199a2dd95SBruce Richardson 	return NULL;
57299a2dd95SBruce Richardson }
57399a2dd95SBruce Richardson 
57499a2dd95SBruce Richardson int
rte_vhost_driver_attach_vdpa_device(const char * path,struct rte_vdpa_device * dev)57599a2dd95SBruce Richardson rte_vhost_driver_attach_vdpa_device(const char *path,
57699a2dd95SBruce Richardson 		struct rte_vdpa_device *dev)
57799a2dd95SBruce Richardson {
57899a2dd95SBruce Richardson 	struct vhost_user_socket *vsocket;
57999a2dd95SBruce Richardson 
58099a2dd95SBruce Richardson 	if (dev == NULL || path == NULL)
58199a2dd95SBruce Richardson 		return -1;
58299a2dd95SBruce Richardson 
58399a2dd95SBruce Richardson 	pthread_mutex_lock(&vhost_user.mutex);
58499a2dd95SBruce Richardson 	vsocket = find_vhost_user_socket(path);
58599a2dd95SBruce Richardson 	if (vsocket)
58699a2dd95SBruce Richardson 		vsocket->vdpa_dev = dev;
58799a2dd95SBruce Richardson 	pthread_mutex_unlock(&vhost_user.mutex);
58899a2dd95SBruce Richardson 
58999a2dd95SBruce Richardson 	return vsocket ? 0 : -1;
59099a2dd95SBruce Richardson }
59199a2dd95SBruce Richardson 
59299a2dd95SBruce Richardson int
rte_vhost_driver_detach_vdpa_device(const char * path)59399a2dd95SBruce Richardson rte_vhost_driver_detach_vdpa_device(const char *path)
59499a2dd95SBruce Richardson {
59599a2dd95SBruce Richardson 	struct vhost_user_socket *vsocket;
59699a2dd95SBruce Richardson 
59799a2dd95SBruce Richardson 	pthread_mutex_lock(&vhost_user.mutex);
59899a2dd95SBruce Richardson 	vsocket = find_vhost_user_socket(path);
59999a2dd95SBruce Richardson 	if (vsocket)
60099a2dd95SBruce Richardson 		vsocket->vdpa_dev = NULL;
60199a2dd95SBruce Richardson 	pthread_mutex_unlock(&vhost_user.mutex);
60299a2dd95SBruce Richardson 
60399a2dd95SBruce Richardson 	return vsocket ? 0 : -1;
60499a2dd95SBruce Richardson }
60599a2dd95SBruce Richardson 
60699a2dd95SBruce Richardson struct rte_vdpa_device *
rte_vhost_driver_get_vdpa_device(const char * path)60799a2dd95SBruce Richardson rte_vhost_driver_get_vdpa_device(const char *path)
60899a2dd95SBruce Richardson {
60999a2dd95SBruce Richardson 	struct vhost_user_socket *vsocket;
61099a2dd95SBruce Richardson 	struct rte_vdpa_device *dev = NULL;
61199a2dd95SBruce Richardson 
61299a2dd95SBruce Richardson 	pthread_mutex_lock(&vhost_user.mutex);
61399a2dd95SBruce Richardson 	vsocket = find_vhost_user_socket(path);
61499a2dd95SBruce Richardson 	if (vsocket)
61599a2dd95SBruce Richardson 		dev = vsocket->vdpa_dev;
61699a2dd95SBruce Richardson 	pthread_mutex_unlock(&vhost_user.mutex);
61799a2dd95SBruce Richardson 
61899a2dd95SBruce Richardson 	return dev;
61999a2dd95SBruce Richardson }
62099a2dd95SBruce Richardson 
62199a2dd95SBruce Richardson int
rte_vhost_driver_disable_features(const char * path,uint64_t features)62299a2dd95SBruce Richardson rte_vhost_driver_disable_features(const char *path, uint64_t features)
62399a2dd95SBruce Richardson {
62499a2dd95SBruce Richardson 	struct vhost_user_socket *vsocket;
62599a2dd95SBruce Richardson 
62699a2dd95SBruce Richardson 	pthread_mutex_lock(&vhost_user.mutex);
62799a2dd95SBruce Richardson 	vsocket = find_vhost_user_socket(path);
62899a2dd95SBruce Richardson 
62999a2dd95SBruce Richardson 	/* Note that use_builtin_virtio_net is not affected by this function
63099a2dd95SBruce Richardson 	 * since callers may want to selectively disable features of the
63199a2dd95SBruce Richardson 	 * built-in vhost net device backend.
63299a2dd95SBruce Richardson 	 */
63399a2dd95SBruce Richardson 
63499a2dd95SBruce Richardson 	if (vsocket)
63599a2dd95SBruce Richardson 		vsocket->features &= ~features;
63699a2dd95SBruce Richardson 	pthread_mutex_unlock(&vhost_user.mutex);
63799a2dd95SBruce Richardson 
63899a2dd95SBruce Richardson 	return vsocket ? 0 : -1;
63999a2dd95SBruce Richardson }
64099a2dd95SBruce Richardson 
64199a2dd95SBruce Richardson int
rte_vhost_driver_enable_features(const char * path,uint64_t features)64299a2dd95SBruce Richardson rte_vhost_driver_enable_features(const char *path, uint64_t features)
64399a2dd95SBruce Richardson {
64499a2dd95SBruce Richardson 	struct vhost_user_socket *vsocket;
64599a2dd95SBruce Richardson 
64699a2dd95SBruce Richardson 	pthread_mutex_lock(&vhost_user.mutex);
64799a2dd95SBruce Richardson 	vsocket = find_vhost_user_socket(path);
64899a2dd95SBruce Richardson 	if (vsocket) {
64999a2dd95SBruce Richardson 		if ((vsocket->supported_features & features) != features) {
65099a2dd95SBruce Richardson 			/*
65199a2dd95SBruce Richardson 			 * trying to enable features the driver doesn't
65299a2dd95SBruce Richardson 			 * support.
65399a2dd95SBruce Richardson 			 */
65499a2dd95SBruce Richardson 			pthread_mutex_unlock(&vhost_user.mutex);
65599a2dd95SBruce Richardson 			return -1;
65699a2dd95SBruce Richardson 		}
65799a2dd95SBruce Richardson 		vsocket->features |= features;
65899a2dd95SBruce Richardson 	}
65999a2dd95SBruce Richardson 	pthread_mutex_unlock(&vhost_user.mutex);
66099a2dd95SBruce Richardson 
66199a2dd95SBruce Richardson 	return vsocket ? 0 : -1;
66299a2dd95SBruce Richardson }
66399a2dd95SBruce Richardson 
66499a2dd95SBruce Richardson int
rte_vhost_driver_set_features(const char * path,uint64_t features)66599a2dd95SBruce Richardson rte_vhost_driver_set_features(const char *path, uint64_t features)
66699a2dd95SBruce Richardson {
66799a2dd95SBruce Richardson 	struct vhost_user_socket *vsocket;
66899a2dd95SBruce Richardson 
66999a2dd95SBruce Richardson 	pthread_mutex_lock(&vhost_user.mutex);
67099a2dd95SBruce Richardson 	vsocket = find_vhost_user_socket(path);
67199a2dd95SBruce Richardson 	if (vsocket) {
67299a2dd95SBruce Richardson 		vsocket->supported_features = features;
67399a2dd95SBruce Richardson 		vsocket->features = features;
67499a2dd95SBruce Richardson 
67599a2dd95SBruce Richardson 		/* Anyone setting feature bits is implementing their own vhost
67699a2dd95SBruce Richardson 		 * device backend.
67799a2dd95SBruce Richardson 		 */
67899a2dd95SBruce Richardson 		vsocket->use_builtin_virtio_net = false;
67999a2dd95SBruce Richardson 	}
68099a2dd95SBruce Richardson 	pthread_mutex_unlock(&vhost_user.mutex);
68199a2dd95SBruce Richardson 
68299a2dd95SBruce Richardson 	return vsocket ? 0 : -1;
68399a2dd95SBruce Richardson }
68499a2dd95SBruce Richardson 
68599a2dd95SBruce Richardson int
rte_vhost_driver_get_features(const char * path,uint64_t * features)68699a2dd95SBruce Richardson rte_vhost_driver_get_features(const char *path, uint64_t *features)
68799a2dd95SBruce Richardson {
68899a2dd95SBruce Richardson 	struct vhost_user_socket *vsocket;
68999a2dd95SBruce Richardson 	uint64_t vdpa_features;
69099a2dd95SBruce Richardson 	struct rte_vdpa_device *vdpa_dev;
69199a2dd95SBruce Richardson 	int ret = 0;
69299a2dd95SBruce Richardson 
69399a2dd95SBruce Richardson 	pthread_mutex_lock(&vhost_user.mutex);
69499a2dd95SBruce Richardson 	vsocket = find_vhost_user_socket(path);
69599a2dd95SBruce Richardson 	if (!vsocket) {
696c85c35b1SMaxime Coquelin 		VHOST_LOG_CONFIG(ERR, "(%s) socket file is not registered yet.\n", path);
69799a2dd95SBruce Richardson 		ret = -1;
69899a2dd95SBruce Richardson 		goto unlock_exit;
69999a2dd95SBruce Richardson 	}
70099a2dd95SBruce Richardson 
70199a2dd95SBruce Richardson 	vdpa_dev = vsocket->vdpa_dev;
70299a2dd95SBruce Richardson 	if (!vdpa_dev) {
70399a2dd95SBruce Richardson 		*features = vsocket->features;
70499a2dd95SBruce Richardson 		goto unlock_exit;
70599a2dd95SBruce Richardson 	}
70699a2dd95SBruce Richardson 
70799a2dd95SBruce Richardson 	if (vdpa_dev->ops->get_features(vdpa_dev, &vdpa_features) < 0) {
708c85c35b1SMaxime Coquelin 		VHOST_LOG_CONFIG(ERR, "(%s) failed to get vdpa features for socket file.\n", path);
70999a2dd95SBruce Richardson 		ret = -1;
71099a2dd95SBruce Richardson 		goto unlock_exit;
71199a2dd95SBruce Richardson 	}
71299a2dd95SBruce Richardson 
71399a2dd95SBruce Richardson 	*features = vsocket->features & vdpa_features;
71499a2dd95SBruce Richardson 
71599a2dd95SBruce Richardson unlock_exit:
71699a2dd95SBruce Richardson 	pthread_mutex_unlock(&vhost_user.mutex);
71799a2dd95SBruce Richardson 	return ret;
71899a2dd95SBruce Richardson }
71999a2dd95SBruce Richardson 
72099a2dd95SBruce Richardson int
rte_vhost_driver_set_protocol_features(const char * path,uint64_t protocol_features)72199a2dd95SBruce Richardson rte_vhost_driver_set_protocol_features(const char *path,
72299a2dd95SBruce Richardson 		uint64_t protocol_features)
72399a2dd95SBruce Richardson {
72499a2dd95SBruce Richardson 	struct vhost_user_socket *vsocket;
72599a2dd95SBruce Richardson 
72699a2dd95SBruce Richardson 	pthread_mutex_lock(&vhost_user.mutex);
72799a2dd95SBruce Richardson 	vsocket = find_vhost_user_socket(path);
72899a2dd95SBruce Richardson 	if (vsocket)
72999a2dd95SBruce Richardson 		vsocket->protocol_features = protocol_features;
73099a2dd95SBruce Richardson 	pthread_mutex_unlock(&vhost_user.mutex);
73199a2dd95SBruce Richardson 	return vsocket ? 0 : -1;
73299a2dd95SBruce Richardson }
73399a2dd95SBruce Richardson 
73499a2dd95SBruce Richardson int
rte_vhost_driver_get_protocol_features(const char * path,uint64_t * protocol_features)73599a2dd95SBruce Richardson rte_vhost_driver_get_protocol_features(const char *path,
73699a2dd95SBruce Richardson 		uint64_t *protocol_features)
73799a2dd95SBruce Richardson {
73899a2dd95SBruce Richardson 	struct vhost_user_socket *vsocket;
73999a2dd95SBruce Richardson 	uint64_t vdpa_protocol_features;
74099a2dd95SBruce Richardson 	struct rte_vdpa_device *vdpa_dev;
74199a2dd95SBruce Richardson 	int ret = 0;
74299a2dd95SBruce Richardson 
74399a2dd95SBruce Richardson 	pthread_mutex_lock(&vhost_user.mutex);
74499a2dd95SBruce Richardson 	vsocket = find_vhost_user_socket(path);
74599a2dd95SBruce Richardson 	if (!vsocket) {
746c85c35b1SMaxime Coquelin 		VHOST_LOG_CONFIG(ERR, "(%s) socket file is not registered yet.\n", path);
74799a2dd95SBruce Richardson 		ret = -1;
74899a2dd95SBruce Richardson 		goto unlock_exit;
74999a2dd95SBruce Richardson 	}
75099a2dd95SBruce Richardson 
75199a2dd95SBruce Richardson 	vdpa_dev = vsocket->vdpa_dev;
75299a2dd95SBruce Richardson 	if (!vdpa_dev) {
75399a2dd95SBruce Richardson 		*protocol_features = vsocket->protocol_features;
75499a2dd95SBruce Richardson 		goto unlock_exit;
75599a2dd95SBruce Richardson 	}
75699a2dd95SBruce Richardson 
75799a2dd95SBruce Richardson 	if (vdpa_dev->ops->get_protocol_features(vdpa_dev,
75899a2dd95SBruce Richardson 				&vdpa_protocol_features) < 0) {
759c85c35b1SMaxime Coquelin 		VHOST_LOG_CONFIG(ERR, "(%s) failed to get vdpa protocol features.\n",
760c85c35b1SMaxime Coquelin 				path);
76199a2dd95SBruce Richardson 		ret = -1;
76299a2dd95SBruce Richardson 		goto unlock_exit;
76399a2dd95SBruce Richardson 	}
76499a2dd95SBruce Richardson 
76599a2dd95SBruce Richardson 	*protocol_features = vsocket->protocol_features
76699a2dd95SBruce Richardson 		& vdpa_protocol_features;
76799a2dd95SBruce Richardson 
76899a2dd95SBruce Richardson unlock_exit:
76999a2dd95SBruce Richardson 	pthread_mutex_unlock(&vhost_user.mutex);
77099a2dd95SBruce Richardson 	return ret;
77199a2dd95SBruce Richardson }
77299a2dd95SBruce Richardson 
77399a2dd95SBruce Richardson int
rte_vhost_driver_get_queue_num(const char * path,uint32_t * queue_num)77499a2dd95SBruce Richardson rte_vhost_driver_get_queue_num(const char *path, uint32_t *queue_num)
77599a2dd95SBruce Richardson {
77699a2dd95SBruce Richardson 	struct vhost_user_socket *vsocket;
77799a2dd95SBruce Richardson 	uint32_t vdpa_queue_num;
77899a2dd95SBruce Richardson 	struct rte_vdpa_device *vdpa_dev;
77999a2dd95SBruce Richardson 	int ret = 0;
78099a2dd95SBruce Richardson 
78199a2dd95SBruce Richardson 	pthread_mutex_lock(&vhost_user.mutex);
78299a2dd95SBruce Richardson 	vsocket = find_vhost_user_socket(path);
78399a2dd95SBruce Richardson 	if (!vsocket) {
784c85c35b1SMaxime Coquelin 		VHOST_LOG_CONFIG(ERR, "(%s) socket file is not registered yet.\n", path);
78599a2dd95SBruce Richardson 		ret = -1;
78699a2dd95SBruce Richardson 		goto unlock_exit;
78799a2dd95SBruce Richardson 	}
78899a2dd95SBruce Richardson 
78999a2dd95SBruce Richardson 	vdpa_dev = vsocket->vdpa_dev;
79099a2dd95SBruce Richardson 	if (!vdpa_dev) {
79199a2dd95SBruce Richardson 		*queue_num = VHOST_MAX_QUEUE_PAIRS;
79299a2dd95SBruce Richardson 		goto unlock_exit;
79399a2dd95SBruce Richardson 	}
79499a2dd95SBruce Richardson 
79599a2dd95SBruce Richardson 	if (vdpa_dev->ops->get_queue_num(vdpa_dev, &vdpa_queue_num) < 0) {
796c85c35b1SMaxime Coquelin 		VHOST_LOG_CONFIG(ERR, "(%s) failed to get vdpa queue number.\n",
797c85c35b1SMaxime Coquelin 				path);
79899a2dd95SBruce Richardson 		ret = -1;
79999a2dd95SBruce Richardson 		goto unlock_exit;
80099a2dd95SBruce Richardson 	}
80199a2dd95SBruce Richardson 
80299a2dd95SBruce Richardson 	*queue_num = RTE_MIN((uint32_t)VHOST_MAX_QUEUE_PAIRS, vdpa_queue_num);
80399a2dd95SBruce Richardson 
80499a2dd95SBruce Richardson unlock_exit:
80599a2dd95SBruce Richardson 	pthread_mutex_unlock(&vhost_user.mutex);
80699a2dd95SBruce Richardson 	return ret;
80799a2dd95SBruce Richardson }
80899a2dd95SBruce Richardson 
80999a2dd95SBruce Richardson static void
vhost_user_socket_mem_free(struct vhost_user_socket * vsocket)81099a2dd95SBruce Richardson vhost_user_socket_mem_free(struct vhost_user_socket *vsocket)
81199a2dd95SBruce Richardson {
81299a2dd95SBruce Richardson 	if (vsocket && vsocket->path) {
81399a2dd95SBruce Richardson 		free(vsocket->path);
81499a2dd95SBruce Richardson 		vsocket->path = NULL;
81599a2dd95SBruce Richardson 	}
81699a2dd95SBruce Richardson 
81799a2dd95SBruce Richardson 	if (vsocket) {
81899a2dd95SBruce Richardson 		free(vsocket);
81999a2dd95SBruce Richardson 		vsocket = NULL;
82099a2dd95SBruce Richardson 	}
82199a2dd95SBruce Richardson }
82299a2dd95SBruce Richardson 
82399a2dd95SBruce Richardson /*
82499a2dd95SBruce Richardson  * Register a new vhost-user socket; here we could act as server
82599a2dd95SBruce Richardson  * (the default case), or client (when RTE_VHOST_USER_CLIENT) flag
82699a2dd95SBruce Richardson  * is set.
82799a2dd95SBruce Richardson  */
82899a2dd95SBruce Richardson int
rte_vhost_driver_register(const char * path,uint64_t flags)82999a2dd95SBruce Richardson rte_vhost_driver_register(const char *path, uint64_t flags)
83099a2dd95SBruce Richardson {
83199a2dd95SBruce Richardson 	int ret = -1;
83299a2dd95SBruce Richardson 	struct vhost_user_socket *vsocket;
83399a2dd95SBruce Richardson 
83499a2dd95SBruce Richardson 	if (!path)
83599a2dd95SBruce Richardson 		return -1;
83699a2dd95SBruce Richardson 
83799a2dd95SBruce Richardson 	pthread_mutex_lock(&vhost_user.mutex);
83899a2dd95SBruce Richardson 
83999a2dd95SBruce Richardson 	if (vhost_user.vsocket_cnt == MAX_VHOST_SOCKET) {
840c85c35b1SMaxime Coquelin 		VHOST_LOG_CONFIG(ERR, "(%s) the number of vhost sockets reaches maximum\n",
841c85c35b1SMaxime Coquelin 				path);
84299a2dd95SBruce Richardson 		goto out;
84399a2dd95SBruce Richardson 	}
84499a2dd95SBruce Richardson 
84599a2dd95SBruce Richardson 	vsocket = malloc(sizeof(struct vhost_user_socket));
84699a2dd95SBruce Richardson 	if (!vsocket)
84799a2dd95SBruce Richardson 		goto out;
84899a2dd95SBruce Richardson 	memset(vsocket, 0, sizeof(struct vhost_user_socket));
84999a2dd95SBruce Richardson 	vsocket->path = strdup(path);
85099a2dd95SBruce Richardson 	if (vsocket->path == NULL) {
851c85c35b1SMaxime Coquelin 		VHOST_LOG_CONFIG(ERR, "(%s) failed to copy socket path string\n", path);
85299a2dd95SBruce Richardson 		vhost_user_socket_mem_free(vsocket);
85399a2dd95SBruce Richardson 		goto out;
85499a2dd95SBruce Richardson 	}
85599a2dd95SBruce Richardson 	TAILQ_INIT(&vsocket->conn_list);
85699a2dd95SBruce Richardson 	ret = pthread_mutex_init(&vsocket->conn_mutex, NULL);
85799a2dd95SBruce Richardson 	if (ret) {
858c85c35b1SMaxime Coquelin 		VHOST_LOG_CONFIG(ERR, "(%s) failed to init connection mutex\n", path);
85999a2dd95SBruce Richardson 		goto out_free;
86099a2dd95SBruce Richardson 	}
86199a2dd95SBruce Richardson 	vsocket->vdpa_dev = NULL;
86299a2dd95SBruce Richardson 	vsocket->extbuf = flags & RTE_VHOST_USER_EXTBUF_SUPPORT;
86399a2dd95SBruce Richardson 	vsocket->linearbuf = flags & RTE_VHOST_USER_LINEARBUF_SUPPORT;
86499a2dd95SBruce Richardson 	vsocket->async_copy = flags & RTE_VHOST_USER_ASYNC_COPY;
865ca7036b4SDavid Marchand 	vsocket->net_compliant_ol_flags = flags & RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS;
86699a2dd95SBruce Richardson 
86799a2dd95SBruce Richardson 	if (vsocket->async_copy &&
86899a2dd95SBruce Richardson 		(flags & (RTE_VHOST_USER_IOMMU_SUPPORT |
86999a2dd95SBruce Richardson 		RTE_VHOST_USER_POSTCOPY_SUPPORT))) {
870*5b030165SMaxime Coquelin 		VHOST_LOG_CONFIG(ERR, "(%s) async copy with IOMMU or post-copy not supported\n",
871*5b030165SMaxime Coquelin 				path);
87299a2dd95SBruce Richardson 		goto out_mutex;
87399a2dd95SBruce Richardson 	}
87499a2dd95SBruce Richardson 
87599a2dd95SBruce Richardson 	/*
87699a2dd95SBruce Richardson 	 * Set the supported features correctly for the builtin vhost-user
87799a2dd95SBruce Richardson 	 * net driver.
87899a2dd95SBruce Richardson 	 *
87999a2dd95SBruce Richardson 	 * Applications know nothing about features the builtin virtio net
88099a2dd95SBruce Richardson 	 * driver (virtio_net.c) supports, thus it's not possible for them
88199a2dd95SBruce Richardson 	 * to invoke rte_vhost_driver_set_features(). To workaround it, here
88299a2dd95SBruce Richardson 	 * we set it unconditionally. If the application want to implement
88399a2dd95SBruce Richardson 	 * another vhost-user driver (say SCSI), it should call the
88499a2dd95SBruce Richardson 	 * rte_vhost_driver_set_features(), which will overwrite following
88599a2dd95SBruce Richardson 	 * two values.
88699a2dd95SBruce Richardson 	 */
88799a2dd95SBruce Richardson 	vsocket->use_builtin_virtio_net = true;
88899a2dd95SBruce Richardson 	vsocket->supported_features = VIRTIO_NET_SUPPORTED_FEATURES;
88999a2dd95SBruce Richardson 	vsocket->features           = VIRTIO_NET_SUPPORTED_FEATURES;
89099a2dd95SBruce Richardson 	vsocket->protocol_features  = VHOST_USER_PROTOCOL_FEATURES;
89199a2dd95SBruce Richardson 
89299a2dd95SBruce Richardson 	if (vsocket->async_copy) {
89399a2dd95SBruce Richardson 		vsocket->supported_features &= ~(1ULL << VHOST_F_LOG_ALL);
89499a2dd95SBruce Richardson 		vsocket->features &= ~(1ULL << VHOST_F_LOG_ALL);
895c85c35b1SMaxime Coquelin 		VHOST_LOG_CONFIG(INFO, "(%s) logging feature is disabled in async copy mode\n",
896c85c35b1SMaxime Coquelin 				path);
89799a2dd95SBruce Richardson 	}
89899a2dd95SBruce Richardson 
89999a2dd95SBruce Richardson 	/*
90099a2dd95SBruce Richardson 	 * We'll not be able to receive a buffer from guest in linear mode
90199a2dd95SBruce Richardson 	 * without external buffer if it will not fit in a single mbuf, which is
90299a2dd95SBruce Richardson 	 * likely if segmentation offloading enabled.
90399a2dd95SBruce Richardson 	 */
90499a2dd95SBruce Richardson 	if (vsocket->linearbuf && !vsocket->extbuf) {
90599a2dd95SBruce Richardson 		uint64_t seg_offload_features =
90699a2dd95SBruce Richardson 				(1ULL << VIRTIO_NET_F_HOST_TSO4) |
90799a2dd95SBruce Richardson 				(1ULL << VIRTIO_NET_F_HOST_TSO6) |
90899a2dd95SBruce Richardson 				(1ULL << VIRTIO_NET_F_HOST_UFO);
90999a2dd95SBruce Richardson 
910*5b030165SMaxime Coquelin 		VHOST_LOG_CONFIG(INFO, "(%s) Linear buffers requested without external buffers,\n",
911*5b030165SMaxime Coquelin 				path);
912*5b030165SMaxime Coquelin 		VHOST_LOG_CONFIG(INFO, "(%s) disabling host segmentation offloading support\n",
913*5b030165SMaxime Coquelin 				path);
91499a2dd95SBruce Richardson 		vsocket->supported_features &= ~seg_offload_features;
91599a2dd95SBruce Richardson 		vsocket->features &= ~seg_offload_features;
91699a2dd95SBruce Richardson 	}
91799a2dd95SBruce Richardson 
91899a2dd95SBruce Richardson 	if (!(flags & RTE_VHOST_USER_IOMMU_SUPPORT)) {
91999a2dd95SBruce Richardson 		vsocket->supported_features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
92099a2dd95SBruce Richardson 		vsocket->features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
92199a2dd95SBruce Richardson 	}
92299a2dd95SBruce Richardson 
92399a2dd95SBruce Richardson 	if (!(flags & RTE_VHOST_USER_POSTCOPY_SUPPORT)) {
92499a2dd95SBruce Richardson 		vsocket->protocol_features &=
92599a2dd95SBruce Richardson 			~(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT);
92699a2dd95SBruce Richardson 	} else {
92799a2dd95SBruce Richardson #ifndef RTE_LIBRTE_VHOST_POSTCOPY
928c85c35b1SMaxime Coquelin 		VHOST_LOG_CONFIG(ERR, "(%s) Postcopy requested but not compiled\n", path);
92999a2dd95SBruce Richardson 		ret = -1;
93099a2dd95SBruce Richardson 		goto out_mutex;
93199a2dd95SBruce Richardson #endif
93299a2dd95SBruce Richardson 	}
93399a2dd95SBruce Richardson 
93499a2dd95SBruce Richardson 	if ((flags & RTE_VHOST_USER_CLIENT) != 0) {
93599a2dd95SBruce Richardson 		vsocket->reconnect = !(flags & RTE_VHOST_USER_NO_RECONNECT);
93699a2dd95SBruce Richardson 		if (vsocket->reconnect && reconn_tid == 0) {
93799a2dd95SBruce Richardson 			if (vhost_user_reconnect_init() != 0)
93899a2dd95SBruce Richardson 				goto out_mutex;
93999a2dd95SBruce Richardson 		}
94099a2dd95SBruce Richardson 	} else {
94199a2dd95SBruce Richardson 		vsocket->is_server = true;
94299a2dd95SBruce Richardson 	}
94399a2dd95SBruce Richardson 	ret = create_unix_socket(vsocket);
94499a2dd95SBruce Richardson 	if (ret < 0) {
94599a2dd95SBruce Richardson 		goto out_mutex;
94699a2dd95SBruce Richardson 	}
94799a2dd95SBruce Richardson 
94899a2dd95SBruce Richardson 	vhost_user.vsockets[vhost_user.vsocket_cnt++] = vsocket;
94999a2dd95SBruce Richardson 
95099a2dd95SBruce Richardson 	pthread_mutex_unlock(&vhost_user.mutex);
95199a2dd95SBruce Richardson 	return ret;
95299a2dd95SBruce Richardson 
95399a2dd95SBruce Richardson out_mutex:
95499a2dd95SBruce Richardson 	if (pthread_mutex_destroy(&vsocket->conn_mutex)) {
955c85c35b1SMaxime Coquelin 		VHOST_LOG_CONFIG(ERR, "(%s) failed to destroy connection mutex\n", path);
95699a2dd95SBruce Richardson 	}
95799a2dd95SBruce Richardson out_free:
95899a2dd95SBruce Richardson 	vhost_user_socket_mem_free(vsocket);
95999a2dd95SBruce Richardson out:
96099a2dd95SBruce Richardson 	pthread_mutex_unlock(&vhost_user.mutex);
96199a2dd95SBruce Richardson 
96299a2dd95SBruce Richardson 	return ret;
96399a2dd95SBruce Richardson }
96499a2dd95SBruce Richardson 
96599a2dd95SBruce Richardson static bool
vhost_user_remove_reconnect(struct vhost_user_socket * vsocket)96699a2dd95SBruce Richardson vhost_user_remove_reconnect(struct vhost_user_socket *vsocket)
96799a2dd95SBruce Richardson {
96899a2dd95SBruce Richardson 	int found = false;
96999a2dd95SBruce Richardson 	struct vhost_user_reconnect *reconn, *next;
97099a2dd95SBruce Richardson 
97199a2dd95SBruce Richardson 	pthread_mutex_lock(&reconn_list.mutex);
97299a2dd95SBruce Richardson 
97399a2dd95SBruce Richardson 	for (reconn = TAILQ_FIRST(&reconn_list.head);
97499a2dd95SBruce Richardson 	     reconn != NULL; reconn = next) {
97599a2dd95SBruce Richardson 		next = TAILQ_NEXT(reconn, next);
97699a2dd95SBruce Richardson 
97799a2dd95SBruce Richardson 		if (reconn->vsocket == vsocket) {
97899a2dd95SBruce Richardson 			TAILQ_REMOVE(&reconn_list.head, reconn, next);
97999a2dd95SBruce Richardson 			close(reconn->fd);
98099a2dd95SBruce Richardson 			free(reconn);
98199a2dd95SBruce Richardson 			found = true;
98299a2dd95SBruce Richardson 			break;
98399a2dd95SBruce Richardson 		}
98499a2dd95SBruce Richardson 	}
98599a2dd95SBruce Richardson 	pthread_mutex_unlock(&reconn_list.mutex);
98699a2dd95SBruce Richardson 	return found;
98799a2dd95SBruce Richardson }
98899a2dd95SBruce Richardson 
98999a2dd95SBruce Richardson /**
99099a2dd95SBruce Richardson  * Unregister the specified vhost socket
99199a2dd95SBruce Richardson  */
99299a2dd95SBruce Richardson int
rte_vhost_driver_unregister(const char * path)99399a2dd95SBruce Richardson rte_vhost_driver_unregister(const char *path)
99499a2dd95SBruce Richardson {
99599a2dd95SBruce Richardson 	int i;
99699a2dd95SBruce Richardson 	int count;
99799a2dd95SBruce Richardson 	struct vhost_user_connection *conn, *next;
99899a2dd95SBruce Richardson 
99999a2dd95SBruce Richardson 	if (path == NULL)
100099a2dd95SBruce Richardson 		return -1;
100199a2dd95SBruce Richardson 
100299a2dd95SBruce Richardson again:
100399a2dd95SBruce Richardson 	pthread_mutex_lock(&vhost_user.mutex);
100499a2dd95SBruce Richardson 
100599a2dd95SBruce Richardson 	for (i = 0; i < vhost_user.vsocket_cnt; i++) {
100699a2dd95SBruce Richardson 		struct vhost_user_socket *vsocket = vhost_user.vsockets[i];
1007451dc0faSGaoxiang Liu 		if (strcmp(vsocket->path, path))
1008451dc0faSGaoxiang Liu 			continue;
100999a2dd95SBruce Richardson 
1010451dc0faSGaoxiang Liu 		if (vsocket->is_server) {
1011451dc0faSGaoxiang Liu 			/*
1012451dc0faSGaoxiang Liu 			 * If r/wcb is executing, release vhost_user's
1013451dc0faSGaoxiang Liu 			 * mutex lock, and try again since the r/wcb
1014451dc0faSGaoxiang Liu 			 * may use the mutex lock.
1015451dc0faSGaoxiang Liu 			 */
1016451dc0faSGaoxiang Liu 			if (fdset_try_del(&vhost_user.fdset, vsocket->socket_fd) == -1) {
1017451dc0faSGaoxiang Liu 				pthread_mutex_unlock(&vhost_user.mutex);
1018451dc0faSGaoxiang Liu 				goto again;
1019451dc0faSGaoxiang Liu 			}
1020451dc0faSGaoxiang Liu 		} else if (vsocket->reconnect) {
1021451dc0faSGaoxiang Liu 			vhost_user_remove_reconnect(vsocket);
1022451dc0faSGaoxiang Liu 		}
1023451dc0faSGaoxiang Liu 
102499a2dd95SBruce Richardson 		pthread_mutex_lock(&vsocket->conn_mutex);
102599a2dd95SBruce Richardson 		for (conn = TAILQ_FIRST(&vsocket->conn_list);
102699a2dd95SBruce Richardson 			 conn != NULL;
102799a2dd95SBruce Richardson 			 conn = next) {
102899a2dd95SBruce Richardson 			next = TAILQ_NEXT(conn, next);
102999a2dd95SBruce Richardson 
103099a2dd95SBruce Richardson 			/*
103199a2dd95SBruce Richardson 			 * If r/wcb is executing, release vsocket's
103299a2dd95SBruce Richardson 			 * conn_mutex and vhost_user's mutex locks, and
103399a2dd95SBruce Richardson 			 * try again since the r/wcb may use the
103499a2dd95SBruce Richardson 			 * conn_mutex and mutex locks.
103599a2dd95SBruce Richardson 			 */
103699a2dd95SBruce Richardson 			if (fdset_try_del(&vhost_user.fdset,
103799a2dd95SBruce Richardson 					  conn->connfd) == -1) {
1038451dc0faSGaoxiang Liu 				pthread_mutex_unlock(&vsocket->conn_mutex);
103999a2dd95SBruce Richardson 				pthread_mutex_unlock(&vhost_user.mutex);
104099a2dd95SBruce Richardson 				goto again;
104199a2dd95SBruce Richardson 			}
104299a2dd95SBruce Richardson 
1043c85c35b1SMaxime Coquelin 			VHOST_LOG_CONFIG(INFO, "(%s) free connfd %d\n", path, conn->connfd);
104499a2dd95SBruce Richardson 			close(conn->connfd);
104599a2dd95SBruce Richardson 			vhost_destroy_device(conn->vid);
104699a2dd95SBruce Richardson 			TAILQ_REMOVE(&vsocket->conn_list, conn, next);
104799a2dd95SBruce Richardson 			free(conn);
104899a2dd95SBruce Richardson 		}
104999a2dd95SBruce Richardson 		pthread_mutex_unlock(&vsocket->conn_mutex);
105099a2dd95SBruce Richardson 
105199a2dd95SBruce Richardson 		if (vsocket->is_server) {
105299a2dd95SBruce Richardson 			close(vsocket->socket_fd);
105399a2dd95SBruce Richardson 			unlink(path);
105499a2dd95SBruce Richardson 		}
105599a2dd95SBruce Richardson 
105699a2dd95SBruce Richardson 		pthread_mutex_destroy(&vsocket->conn_mutex);
105799a2dd95SBruce Richardson 		vhost_user_socket_mem_free(vsocket);
105899a2dd95SBruce Richardson 
105999a2dd95SBruce Richardson 		count = --vhost_user.vsocket_cnt;
106099a2dd95SBruce Richardson 		vhost_user.vsockets[i] = vhost_user.vsockets[count];
106199a2dd95SBruce Richardson 		vhost_user.vsockets[count] = NULL;
106299a2dd95SBruce Richardson 		pthread_mutex_unlock(&vhost_user.mutex);
106399a2dd95SBruce Richardson 		return 0;
106499a2dd95SBruce Richardson 	}
106599a2dd95SBruce Richardson 	pthread_mutex_unlock(&vhost_user.mutex);
106699a2dd95SBruce Richardson 
106799a2dd95SBruce Richardson 	return -1;
106899a2dd95SBruce Richardson }
106999a2dd95SBruce Richardson 
107099a2dd95SBruce Richardson /*
107199a2dd95SBruce Richardson  * Register ops so that we can add/remove device to data core.
107299a2dd95SBruce Richardson  */
107399a2dd95SBruce Richardson int
rte_vhost_driver_callback_register(const char * path,struct rte_vhost_device_ops const * const ops)107499a2dd95SBruce Richardson rte_vhost_driver_callback_register(const char *path,
1075ab4bb424SMaxime Coquelin 	struct rte_vhost_device_ops const * const ops)
107699a2dd95SBruce Richardson {
107799a2dd95SBruce Richardson 	struct vhost_user_socket *vsocket;
107899a2dd95SBruce Richardson 
107999a2dd95SBruce Richardson 	pthread_mutex_lock(&vhost_user.mutex);
108099a2dd95SBruce Richardson 	vsocket = find_vhost_user_socket(path);
108199a2dd95SBruce Richardson 	if (vsocket)
108299a2dd95SBruce Richardson 		vsocket->notify_ops = ops;
108399a2dd95SBruce Richardson 	pthread_mutex_unlock(&vhost_user.mutex);
108499a2dd95SBruce Richardson 
108599a2dd95SBruce Richardson 	return vsocket ? 0 : -1;
108699a2dd95SBruce Richardson }
108799a2dd95SBruce Richardson 
1088ab4bb424SMaxime Coquelin struct rte_vhost_device_ops const *
vhost_driver_callback_get(const char * path)108999a2dd95SBruce Richardson vhost_driver_callback_get(const char *path)
109099a2dd95SBruce Richardson {
109199a2dd95SBruce Richardson 	struct vhost_user_socket *vsocket;
109299a2dd95SBruce Richardson 
109399a2dd95SBruce Richardson 	pthread_mutex_lock(&vhost_user.mutex);
109499a2dd95SBruce Richardson 	vsocket = find_vhost_user_socket(path);
109599a2dd95SBruce Richardson 	pthread_mutex_unlock(&vhost_user.mutex);
109699a2dd95SBruce Richardson 
109799a2dd95SBruce Richardson 	return vsocket ? vsocket->notify_ops : NULL;
109899a2dd95SBruce Richardson }
109999a2dd95SBruce Richardson 
110099a2dd95SBruce Richardson int
rte_vhost_driver_start(const char * path)110199a2dd95SBruce Richardson rte_vhost_driver_start(const char *path)
110299a2dd95SBruce Richardson {
110399a2dd95SBruce Richardson 	struct vhost_user_socket *vsocket;
110499a2dd95SBruce Richardson 	static pthread_t fdset_tid;
110599a2dd95SBruce Richardson 
110699a2dd95SBruce Richardson 	pthread_mutex_lock(&vhost_user.mutex);
110799a2dd95SBruce Richardson 	vsocket = find_vhost_user_socket(path);
110899a2dd95SBruce Richardson 	pthread_mutex_unlock(&vhost_user.mutex);
110999a2dd95SBruce Richardson 
111099a2dd95SBruce Richardson 	if (!vsocket)
111199a2dd95SBruce Richardson 		return -1;
111299a2dd95SBruce Richardson 
111399a2dd95SBruce Richardson 	if (fdset_tid == 0) {
111499a2dd95SBruce Richardson 		/**
111599a2dd95SBruce Richardson 		 * create a pipe which will be waited by poll and notified to
111699a2dd95SBruce Richardson 		 * rebuild the wait list of poll.
111799a2dd95SBruce Richardson 		 */
111899a2dd95SBruce Richardson 		if (fdset_pipe_init(&vhost_user.fdset) < 0) {
1119c85c35b1SMaxime Coquelin 			VHOST_LOG_CONFIG(ERR, "(%s) failed to create pipe for vhost fdset\n", path);
112099a2dd95SBruce Richardson 			return -1;
112199a2dd95SBruce Richardson 		}
112299a2dd95SBruce Richardson 
112399a2dd95SBruce Richardson 		int ret = rte_ctrl_thread_create(&fdset_tid,
112499a2dd95SBruce Richardson 			"vhost-events", NULL, fdset_event_dispatch,
112599a2dd95SBruce Richardson 			&vhost_user.fdset);
112699a2dd95SBruce Richardson 		if (ret != 0) {
1127c85c35b1SMaxime Coquelin 			VHOST_LOG_CONFIG(ERR, "(%s) failed to create fdset handling thread", path);
112899a2dd95SBruce Richardson 
112999a2dd95SBruce Richardson 			fdset_pipe_uninit(&vhost_user.fdset);
113099a2dd95SBruce Richardson 			return -1;
113199a2dd95SBruce Richardson 		}
113299a2dd95SBruce Richardson 	}
113399a2dd95SBruce Richardson 
113499a2dd95SBruce Richardson 	if (vsocket->is_server)
113599a2dd95SBruce Richardson 		return vhost_user_start_server(vsocket);
113699a2dd95SBruce Richardson 	else
113799a2dd95SBruce Richardson 		return vhost_user_start_client(vsocket);
113899a2dd95SBruce Richardson }
1139