1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2016 Intel Corporation 3 */ 4 5 #include <stdint.h> 6 #include <stdio.h> 7 #include <limits.h> 8 #include <stdlib.h> 9 #include <unistd.h> 10 #include <string.h> 11 #include <sys/types.h> 12 #include <sys/socket.h> 13 #include <sys/un.h> 14 #include <sys/queue.h> 15 #include <errno.h> 16 #include <fcntl.h> 17 #include <pthread.h> 18 19 #include <rte_log.h> 20 21 #include "fd_man.h" 22 #include "vhost.h" 23 #include "vhost_user.h" 24 25 26 TAILQ_HEAD(vhost_user_connection_list, vhost_user_connection); 27 28 /* 29 * Every time rte_vhost_driver_register() is invoked, an associated 30 * vhost_user_socket struct will be created. 31 */ 32 struct vhost_user_socket { 33 struct vhost_user_connection_list conn_list; 34 pthread_mutex_t conn_mutex; 35 char *path; 36 int socket_fd; 37 struct sockaddr_un un; 38 bool is_server; 39 bool reconnect; 40 bool dequeue_zero_copy; 41 bool iommu_support; 42 bool use_builtin_virtio_net; 43 44 /* 45 * The "supported_features" indicates the feature bits the 46 * vhost driver supports. The "features" indicates the feature 47 * bits after the rte_vhost_driver_features_disable/enable(). 48 * It is also the final feature bits used for vhost-user 49 * features negotiation. 50 */ 51 uint64_t supported_features; 52 uint64_t features; 53 54 uint64_t protocol_features; 55 56 /* 57 * Device id to identify a specific backend device. 58 * It's set to -1 for the default software implementation. 59 * If valid, one socket can have 1 connection only. 60 */ 61 int vdpa_dev_id; 62 63 struct vhost_device_ops const *notify_ops; 64 }; 65 66 struct vhost_user_connection { 67 struct vhost_user_socket *vsocket; 68 int connfd; 69 int vid; 70 71 TAILQ_ENTRY(vhost_user_connection) next; 72 }; 73 74 #define MAX_VHOST_SOCKET 1024 75 struct vhost_user { 76 struct vhost_user_socket *vsockets[MAX_VHOST_SOCKET]; 77 struct fdset fdset; 78 int vsocket_cnt; 79 pthread_mutex_t mutex; 80 }; 81 82 #define MAX_VIRTIO_BACKLOG 128 83 84 static void vhost_user_server_new_connection(int fd, void *data, int *remove); 85 static void vhost_user_read_cb(int fd, void *dat, int *remove); 86 static int create_unix_socket(struct vhost_user_socket *vsocket); 87 static int vhost_user_start_client(struct vhost_user_socket *vsocket); 88 89 static struct vhost_user vhost_user = { 90 .fdset = { 91 .fd = { [0 ... MAX_FDS - 1] = {-1, NULL, NULL, NULL, 0} }, 92 .fd_mutex = PTHREAD_MUTEX_INITIALIZER, 93 .fd_pooling_mutex = PTHREAD_MUTEX_INITIALIZER, 94 .num = 0 95 }, 96 .vsocket_cnt = 0, 97 .mutex = PTHREAD_MUTEX_INITIALIZER, 98 }; 99 100 /* 101 * return bytes# of read on success or negative val on failure. Update fdnum 102 * with number of fds read. 103 */ 104 int 105 read_fd_message(int sockfd, char *buf, int buflen, int *fds, int max_fds, 106 int *fd_num) 107 { 108 struct iovec iov; 109 struct msghdr msgh; 110 char control[CMSG_SPACE(max_fds * sizeof(int))]; 111 struct cmsghdr *cmsg; 112 int got_fds = 0; 113 int ret; 114 115 *fd_num = 0; 116 117 memset(&msgh, 0, sizeof(msgh)); 118 iov.iov_base = buf; 119 iov.iov_len = buflen; 120 121 msgh.msg_iov = &iov; 122 msgh.msg_iovlen = 1; 123 msgh.msg_control = control; 124 msgh.msg_controllen = sizeof(control); 125 126 ret = recvmsg(sockfd, &msgh, 0); 127 if (ret <= 0) { 128 RTE_LOG(ERR, VHOST_CONFIG, "recvmsg failed\n"); 129 return ret; 130 } 131 132 if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) { 133 RTE_LOG(ERR, VHOST_CONFIG, "truncted msg\n"); 134 return -1; 135 } 136 137 for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL; 138 cmsg = CMSG_NXTHDR(&msgh, cmsg)) { 139 if ((cmsg->cmsg_level == SOL_SOCKET) && 140 (cmsg->cmsg_type == SCM_RIGHTS)) { 141 got_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int); 142 *fd_num = got_fds; 143 memcpy(fds, CMSG_DATA(cmsg), got_fds * sizeof(int)); 144 break; 145 } 146 } 147 148 /* Clear out unused file descriptors */ 149 while (got_fds < max_fds) 150 fds[got_fds++] = -1; 151 152 return ret; 153 } 154 155 int 156 send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num) 157 { 158 159 struct iovec iov; 160 struct msghdr msgh; 161 size_t fdsize = fd_num * sizeof(int); 162 char control[CMSG_SPACE(fdsize)]; 163 struct cmsghdr *cmsg; 164 int ret; 165 166 memset(&msgh, 0, sizeof(msgh)); 167 iov.iov_base = buf; 168 iov.iov_len = buflen; 169 170 msgh.msg_iov = &iov; 171 msgh.msg_iovlen = 1; 172 173 if (fds && fd_num > 0) { 174 msgh.msg_control = control; 175 msgh.msg_controllen = sizeof(control); 176 cmsg = CMSG_FIRSTHDR(&msgh); 177 if (cmsg == NULL) { 178 RTE_LOG(ERR, VHOST_CONFIG, "cmsg == NULL\n"); 179 errno = EINVAL; 180 return -1; 181 } 182 cmsg->cmsg_len = CMSG_LEN(fdsize); 183 cmsg->cmsg_level = SOL_SOCKET; 184 cmsg->cmsg_type = SCM_RIGHTS; 185 memcpy(CMSG_DATA(cmsg), fds, fdsize); 186 } else { 187 msgh.msg_control = NULL; 188 msgh.msg_controllen = 0; 189 } 190 191 do { 192 ret = sendmsg(sockfd, &msgh, MSG_NOSIGNAL); 193 } while (ret < 0 && errno == EINTR); 194 195 if (ret < 0) { 196 RTE_LOG(ERR, VHOST_CONFIG, "sendmsg error\n"); 197 return ret; 198 } 199 200 return ret; 201 } 202 203 static void 204 vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket) 205 { 206 int vid; 207 size_t size; 208 struct vhost_user_connection *conn; 209 int ret; 210 211 if (vsocket == NULL) 212 return; 213 214 conn = malloc(sizeof(*conn)); 215 if (conn == NULL) { 216 close(fd); 217 return; 218 } 219 220 vid = vhost_new_device(); 221 if (vid == -1) { 222 goto err; 223 } 224 225 size = strnlen(vsocket->path, PATH_MAX); 226 vhost_set_ifname(vid, vsocket->path, size); 227 228 vhost_set_builtin_virtio_net(vid, vsocket->use_builtin_virtio_net); 229 230 vhost_attach_vdpa_device(vid, vsocket->vdpa_dev_id); 231 232 if (vsocket->dequeue_zero_copy) 233 vhost_enable_dequeue_zero_copy(vid); 234 235 RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", vid); 236 237 if (vsocket->notify_ops->new_connection) { 238 ret = vsocket->notify_ops->new_connection(vid); 239 if (ret < 0) { 240 RTE_LOG(ERR, VHOST_CONFIG, 241 "failed to add vhost user connection with fd %d\n", 242 fd); 243 goto err_cleanup; 244 } 245 } 246 247 conn->connfd = fd; 248 conn->vsocket = vsocket; 249 conn->vid = vid; 250 ret = fdset_add(&vhost_user.fdset, fd, vhost_user_read_cb, 251 NULL, conn); 252 if (ret < 0) { 253 RTE_LOG(ERR, VHOST_CONFIG, 254 "failed to add fd %d into vhost server fdset\n", 255 fd); 256 257 if (vsocket->notify_ops->destroy_connection) 258 vsocket->notify_ops->destroy_connection(conn->vid); 259 260 goto err_cleanup; 261 } 262 263 pthread_mutex_lock(&vsocket->conn_mutex); 264 TAILQ_INSERT_TAIL(&vsocket->conn_list, conn, next); 265 pthread_mutex_unlock(&vsocket->conn_mutex); 266 267 fdset_pipe_notify(&vhost_user.fdset); 268 return; 269 270 err_cleanup: 271 vhost_destroy_device(vid); 272 err: 273 free(conn); 274 close(fd); 275 } 276 277 /* call back when there is new vhost-user connection from client */ 278 static void 279 vhost_user_server_new_connection(int fd, void *dat, int *remove __rte_unused) 280 { 281 struct vhost_user_socket *vsocket = dat; 282 283 fd = accept(fd, NULL, NULL); 284 if (fd < 0) 285 return; 286 287 RTE_LOG(INFO, VHOST_CONFIG, "new vhost user connection is %d\n", fd); 288 vhost_user_add_connection(fd, vsocket); 289 } 290 291 static void 292 vhost_user_read_cb(int connfd, void *dat, int *remove) 293 { 294 struct vhost_user_connection *conn = dat; 295 struct vhost_user_socket *vsocket = conn->vsocket; 296 int ret; 297 298 ret = vhost_user_msg_handler(conn->vid, connfd); 299 if (ret < 0) { 300 struct virtio_net *dev = get_device(conn->vid); 301 302 close(connfd); 303 *remove = 1; 304 305 if (dev) 306 vhost_destroy_device_notify(dev); 307 308 if (vsocket->notify_ops->destroy_connection) 309 vsocket->notify_ops->destroy_connection(conn->vid); 310 311 vhost_destroy_device(conn->vid); 312 313 pthread_mutex_lock(&vsocket->conn_mutex); 314 TAILQ_REMOVE(&vsocket->conn_list, conn, next); 315 pthread_mutex_unlock(&vsocket->conn_mutex); 316 317 free(conn); 318 319 if (vsocket->reconnect) { 320 create_unix_socket(vsocket); 321 vhost_user_start_client(vsocket); 322 } 323 } 324 } 325 326 static int 327 create_unix_socket(struct vhost_user_socket *vsocket) 328 { 329 int fd; 330 struct sockaddr_un *un = &vsocket->un; 331 332 fd = socket(AF_UNIX, SOCK_STREAM, 0); 333 if (fd < 0) 334 return -1; 335 RTE_LOG(INFO, VHOST_CONFIG, "vhost-user %s: socket created, fd: %d\n", 336 vsocket->is_server ? "server" : "client", fd); 337 338 if (!vsocket->is_server && fcntl(fd, F_SETFL, O_NONBLOCK)) { 339 RTE_LOG(ERR, VHOST_CONFIG, 340 "vhost-user: can't set nonblocking mode for socket, fd: " 341 "%d (%s)\n", fd, strerror(errno)); 342 close(fd); 343 return -1; 344 } 345 346 memset(un, 0, sizeof(*un)); 347 un->sun_family = AF_UNIX; 348 strncpy(un->sun_path, vsocket->path, sizeof(un->sun_path)); 349 un->sun_path[sizeof(un->sun_path) - 1] = '\0'; 350 351 vsocket->socket_fd = fd; 352 return 0; 353 } 354 355 static int 356 vhost_user_start_server(struct vhost_user_socket *vsocket) 357 { 358 int ret; 359 int fd = vsocket->socket_fd; 360 const char *path = vsocket->path; 361 362 /* 363 * bind () may fail if the socket file with the same name already 364 * exists. But the library obviously should not delete the file 365 * provided by the user, since we can not be sure that it is not 366 * being used by other applications. Moreover, many applications form 367 * socket names based on user input, which is prone to errors. 368 * 369 * The user must ensure that the socket does not exist before 370 * registering the vhost driver in server mode. 371 */ 372 ret = bind(fd, (struct sockaddr *)&vsocket->un, sizeof(vsocket->un)); 373 if (ret < 0) { 374 RTE_LOG(ERR, VHOST_CONFIG, 375 "failed to bind to %s: %s; remove it and try again\n", 376 path, strerror(errno)); 377 goto err; 378 } 379 RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path); 380 381 ret = listen(fd, MAX_VIRTIO_BACKLOG); 382 if (ret < 0) 383 goto err; 384 385 ret = fdset_add(&vhost_user.fdset, fd, vhost_user_server_new_connection, 386 NULL, vsocket); 387 if (ret < 0) { 388 RTE_LOG(ERR, VHOST_CONFIG, 389 "failed to add listen fd %d to vhost server fdset\n", 390 fd); 391 goto err; 392 } 393 394 return 0; 395 396 err: 397 close(fd); 398 return -1; 399 } 400 401 struct vhost_user_reconnect { 402 struct sockaddr_un un; 403 int fd; 404 struct vhost_user_socket *vsocket; 405 406 TAILQ_ENTRY(vhost_user_reconnect) next; 407 }; 408 409 TAILQ_HEAD(vhost_user_reconnect_tailq_list, vhost_user_reconnect); 410 struct vhost_user_reconnect_list { 411 struct vhost_user_reconnect_tailq_list head; 412 pthread_mutex_t mutex; 413 }; 414 415 static struct vhost_user_reconnect_list reconn_list; 416 static pthread_t reconn_tid; 417 418 static int 419 vhost_user_connect_nonblock(int fd, struct sockaddr *un, size_t sz) 420 { 421 int ret, flags; 422 423 ret = connect(fd, un, sz); 424 if (ret < 0 && errno != EISCONN) 425 return -1; 426 427 flags = fcntl(fd, F_GETFL, 0); 428 if (flags < 0) { 429 RTE_LOG(ERR, VHOST_CONFIG, 430 "can't get flags for connfd %d\n", fd); 431 return -2; 432 } 433 if ((flags & O_NONBLOCK) && fcntl(fd, F_SETFL, flags & ~O_NONBLOCK)) { 434 RTE_LOG(ERR, VHOST_CONFIG, 435 "can't disable nonblocking on fd %d\n", fd); 436 return -2; 437 } 438 return 0; 439 } 440 441 static void * 442 vhost_user_client_reconnect(void *arg __rte_unused) 443 { 444 int ret; 445 struct vhost_user_reconnect *reconn, *next; 446 447 while (1) { 448 pthread_mutex_lock(&reconn_list.mutex); 449 450 /* 451 * An equal implementation of TAILQ_FOREACH_SAFE, 452 * which does not exist on all platforms. 453 */ 454 for (reconn = TAILQ_FIRST(&reconn_list.head); 455 reconn != NULL; reconn = next) { 456 next = TAILQ_NEXT(reconn, next); 457 458 ret = vhost_user_connect_nonblock(reconn->fd, 459 (struct sockaddr *)&reconn->un, 460 sizeof(reconn->un)); 461 if (ret == -2) { 462 close(reconn->fd); 463 RTE_LOG(ERR, VHOST_CONFIG, 464 "reconnection for fd %d failed\n", 465 reconn->fd); 466 goto remove_fd; 467 } 468 if (ret == -1) 469 continue; 470 471 RTE_LOG(INFO, VHOST_CONFIG, 472 "%s: connected\n", reconn->vsocket->path); 473 vhost_user_add_connection(reconn->fd, reconn->vsocket); 474 remove_fd: 475 TAILQ_REMOVE(&reconn_list.head, reconn, next); 476 free(reconn); 477 } 478 479 pthread_mutex_unlock(&reconn_list.mutex); 480 sleep(1); 481 } 482 483 return NULL; 484 } 485 486 static int 487 vhost_user_reconnect_init(void) 488 { 489 int ret; 490 491 ret = pthread_mutex_init(&reconn_list.mutex, NULL); 492 if (ret < 0) { 493 RTE_LOG(ERR, VHOST_CONFIG, "failed to initialize mutex"); 494 return ret; 495 } 496 TAILQ_INIT(&reconn_list.head); 497 498 ret = rte_ctrl_thread_create(&reconn_tid, "vhost_reconn", NULL, 499 vhost_user_client_reconnect, NULL); 500 if (ret != 0) { 501 RTE_LOG(ERR, VHOST_CONFIG, "failed to create reconnect thread"); 502 if (pthread_mutex_destroy(&reconn_list.mutex)) { 503 RTE_LOG(ERR, VHOST_CONFIG, 504 "failed to destroy reconnect mutex"); 505 } 506 } 507 508 return ret; 509 } 510 511 static int 512 vhost_user_start_client(struct vhost_user_socket *vsocket) 513 { 514 int ret; 515 int fd = vsocket->socket_fd; 516 const char *path = vsocket->path; 517 struct vhost_user_reconnect *reconn; 518 519 ret = vhost_user_connect_nonblock(fd, (struct sockaddr *)&vsocket->un, 520 sizeof(vsocket->un)); 521 if (ret == 0) { 522 vhost_user_add_connection(fd, vsocket); 523 return 0; 524 } 525 526 RTE_LOG(WARNING, VHOST_CONFIG, 527 "failed to connect to %s: %s\n", 528 path, strerror(errno)); 529 530 if (ret == -2 || !vsocket->reconnect) { 531 close(fd); 532 return -1; 533 } 534 535 RTE_LOG(INFO, VHOST_CONFIG, "%s: reconnecting...\n", path); 536 reconn = malloc(sizeof(*reconn)); 537 if (reconn == NULL) { 538 RTE_LOG(ERR, VHOST_CONFIG, 539 "failed to allocate memory for reconnect\n"); 540 close(fd); 541 return -1; 542 } 543 reconn->un = vsocket->un; 544 reconn->fd = fd; 545 reconn->vsocket = vsocket; 546 pthread_mutex_lock(&reconn_list.mutex); 547 TAILQ_INSERT_TAIL(&reconn_list.head, reconn, next); 548 pthread_mutex_unlock(&reconn_list.mutex); 549 550 return 0; 551 } 552 553 static struct vhost_user_socket * 554 find_vhost_user_socket(const char *path) 555 { 556 int i; 557 558 if (path == NULL) 559 return NULL; 560 561 for (i = 0; i < vhost_user.vsocket_cnt; i++) { 562 struct vhost_user_socket *vsocket = vhost_user.vsockets[i]; 563 564 if (!strcmp(vsocket->path, path)) 565 return vsocket; 566 } 567 568 return NULL; 569 } 570 571 int 572 rte_vhost_driver_attach_vdpa_device(const char *path, int did) 573 { 574 struct vhost_user_socket *vsocket; 575 576 if (rte_vdpa_get_device(did) == NULL || path == NULL) 577 return -1; 578 579 pthread_mutex_lock(&vhost_user.mutex); 580 vsocket = find_vhost_user_socket(path); 581 if (vsocket) 582 vsocket->vdpa_dev_id = did; 583 pthread_mutex_unlock(&vhost_user.mutex); 584 585 return vsocket ? 0 : -1; 586 } 587 588 int 589 rte_vhost_driver_detach_vdpa_device(const char *path) 590 { 591 struct vhost_user_socket *vsocket; 592 593 pthread_mutex_lock(&vhost_user.mutex); 594 vsocket = find_vhost_user_socket(path); 595 if (vsocket) 596 vsocket->vdpa_dev_id = -1; 597 pthread_mutex_unlock(&vhost_user.mutex); 598 599 return vsocket ? 0 : -1; 600 } 601 602 int 603 rte_vhost_driver_get_vdpa_device_id(const char *path) 604 { 605 struct vhost_user_socket *vsocket; 606 int did = -1; 607 608 pthread_mutex_lock(&vhost_user.mutex); 609 vsocket = find_vhost_user_socket(path); 610 if (vsocket) 611 did = vsocket->vdpa_dev_id; 612 pthread_mutex_unlock(&vhost_user.mutex); 613 614 return did; 615 } 616 617 int 618 rte_vhost_driver_disable_features(const char *path, uint64_t features) 619 { 620 struct vhost_user_socket *vsocket; 621 622 pthread_mutex_lock(&vhost_user.mutex); 623 vsocket = find_vhost_user_socket(path); 624 625 /* Note that use_builtin_virtio_net is not affected by this function 626 * since callers may want to selectively disable features of the 627 * built-in vhost net device backend. 628 */ 629 630 if (vsocket) 631 vsocket->features &= ~features; 632 pthread_mutex_unlock(&vhost_user.mutex); 633 634 return vsocket ? 0 : -1; 635 } 636 637 int 638 rte_vhost_driver_enable_features(const char *path, uint64_t features) 639 { 640 struct vhost_user_socket *vsocket; 641 642 pthread_mutex_lock(&vhost_user.mutex); 643 vsocket = find_vhost_user_socket(path); 644 if (vsocket) { 645 if ((vsocket->supported_features & features) != features) { 646 /* 647 * trying to enable features the driver doesn't 648 * support. 649 */ 650 pthread_mutex_unlock(&vhost_user.mutex); 651 return -1; 652 } 653 vsocket->features |= features; 654 } 655 pthread_mutex_unlock(&vhost_user.mutex); 656 657 return vsocket ? 0 : -1; 658 } 659 660 int 661 rte_vhost_driver_set_features(const char *path, uint64_t features) 662 { 663 struct vhost_user_socket *vsocket; 664 665 pthread_mutex_lock(&vhost_user.mutex); 666 vsocket = find_vhost_user_socket(path); 667 if (vsocket) { 668 vsocket->supported_features = features; 669 vsocket->features = features; 670 671 /* Anyone setting feature bits is implementing their own vhost 672 * device backend. 673 */ 674 vsocket->use_builtin_virtio_net = false; 675 } 676 pthread_mutex_unlock(&vhost_user.mutex); 677 678 return vsocket ? 0 : -1; 679 } 680 681 int 682 rte_vhost_driver_get_features(const char *path, uint64_t *features) 683 { 684 struct vhost_user_socket *vsocket; 685 uint64_t vdpa_features; 686 struct rte_vdpa_device *vdpa_dev; 687 int did = -1; 688 int ret = 0; 689 690 pthread_mutex_lock(&vhost_user.mutex); 691 vsocket = find_vhost_user_socket(path); 692 if (!vsocket) { 693 RTE_LOG(ERR, VHOST_CONFIG, 694 "socket file %s is not registered yet.\n", path); 695 ret = -1; 696 goto unlock_exit; 697 } 698 699 did = vsocket->vdpa_dev_id; 700 vdpa_dev = rte_vdpa_get_device(did); 701 if (!vdpa_dev || !vdpa_dev->ops->get_features) { 702 *features = vsocket->features; 703 goto unlock_exit; 704 } 705 706 if (vdpa_dev->ops->get_features(did, &vdpa_features) < 0) { 707 RTE_LOG(ERR, VHOST_CONFIG, 708 "failed to get vdpa features " 709 "for socket file %s.\n", path); 710 ret = -1; 711 goto unlock_exit; 712 } 713 714 *features = vsocket->features & vdpa_features; 715 716 unlock_exit: 717 pthread_mutex_unlock(&vhost_user.mutex); 718 return ret; 719 } 720 721 int 722 rte_vhost_driver_get_protocol_features(const char *path, 723 uint64_t *protocol_features) 724 { 725 struct vhost_user_socket *vsocket; 726 uint64_t vdpa_protocol_features; 727 struct rte_vdpa_device *vdpa_dev; 728 int did = -1; 729 int ret = 0; 730 731 pthread_mutex_lock(&vhost_user.mutex); 732 vsocket = find_vhost_user_socket(path); 733 if (!vsocket) { 734 RTE_LOG(ERR, VHOST_CONFIG, 735 "socket file %s is not registered yet.\n", path); 736 ret = -1; 737 goto unlock_exit; 738 } 739 740 did = vsocket->vdpa_dev_id; 741 vdpa_dev = rte_vdpa_get_device(did); 742 if (!vdpa_dev || !vdpa_dev->ops->get_protocol_features) { 743 *protocol_features = vsocket->protocol_features; 744 goto unlock_exit; 745 } 746 747 if (vdpa_dev->ops->get_protocol_features(did, 748 &vdpa_protocol_features) < 0) { 749 RTE_LOG(ERR, VHOST_CONFIG, 750 "failed to get vdpa protocol features " 751 "for socket file %s.\n", path); 752 ret = -1; 753 goto unlock_exit; 754 } 755 756 *protocol_features = vsocket->protocol_features 757 & vdpa_protocol_features; 758 759 unlock_exit: 760 pthread_mutex_unlock(&vhost_user.mutex); 761 return ret; 762 } 763 764 int 765 rte_vhost_driver_get_queue_num(const char *path, uint32_t *queue_num) 766 { 767 struct vhost_user_socket *vsocket; 768 uint32_t vdpa_queue_num; 769 struct rte_vdpa_device *vdpa_dev; 770 int did = -1; 771 int ret = 0; 772 773 pthread_mutex_lock(&vhost_user.mutex); 774 vsocket = find_vhost_user_socket(path); 775 if (!vsocket) { 776 RTE_LOG(ERR, VHOST_CONFIG, 777 "socket file %s is not registered yet.\n", path); 778 ret = -1; 779 goto unlock_exit; 780 } 781 782 did = vsocket->vdpa_dev_id; 783 vdpa_dev = rte_vdpa_get_device(did); 784 if (!vdpa_dev || !vdpa_dev->ops->get_queue_num) { 785 *queue_num = VHOST_MAX_QUEUE_PAIRS; 786 goto unlock_exit; 787 } 788 789 if (vdpa_dev->ops->get_queue_num(did, &vdpa_queue_num) < 0) { 790 RTE_LOG(ERR, VHOST_CONFIG, 791 "failed to get vdpa queue number " 792 "for socket file %s.\n", path); 793 ret = -1; 794 goto unlock_exit; 795 } 796 797 *queue_num = RTE_MIN((uint32_t)VHOST_MAX_QUEUE_PAIRS, vdpa_queue_num); 798 799 unlock_exit: 800 pthread_mutex_unlock(&vhost_user.mutex); 801 return ret; 802 } 803 804 static void 805 vhost_user_socket_mem_free(struct vhost_user_socket *vsocket) 806 { 807 if (vsocket && vsocket->path) { 808 free(vsocket->path); 809 vsocket->path = NULL; 810 } 811 812 if (vsocket) { 813 free(vsocket); 814 vsocket = NULL; 815 } 816 } 817 818 /* 819 * Register a new vhost-user socket; here we could act as server 820 * (the default case), or client (when RTE_VHOST_USER_CLIENT) flag 821 * is set. 822 */ 823 int 824 rte_vhost_driver_register(const char *path, uint64_t flags) 825 { 826 int ret = -1; 827 struct vhost_user_socket *vsocket; 828 829 if (!path) 830 return -1; 831 832 pthread_mutex_lock(&vhost_user.mutex); 833 834 if (vhost_user.vsocket_cnt == MAX_VHOST_SOCKET) { 835 RTE_LOG(ERR, VHOST_CONFIG, 836 "error: the number of vhost sockets reaches maximum\n"); 837 goto out; 838 } 839 840 vsocket = malloc(sizeof(struct vhost_user_socket)); 841 if (!vsocket) 842 goto out; 843 memset(vsocket, 0, sizeof(struct vhost_user_socket)); 844 vsocket->path = strdup(path); 845 if (vsocket->path == NULL) { 846 RTE_LOG(ERR, VHOST_CONFIG, 847 "error: failed to copy socket path string\n"); 848 vhost_user_socket_mem_free(vsocket); 849 goto out; 850 } 851 TAILQ_INIT(&vsocket->conn_list); 852 ret = pthread_mutex_init(&vsocket->conn_mutex, NULL); 853 if (ret) { 854 RTE_LOG(ERR, VHOST_CONFIG, 855 "error: failed to init connection mutex\n"); 856 goto out_free; 857 } 858 vsocket->dequeue_zero_copy = flags & RTE_VHOST_USER_DEQUEUE_ZERO_COPY; 859 860 /* 861 * Set the supported features correctly for the builtin vhost-user 862 * net driver. 863 * 864 * Applications know nothing about features the builtin virtio net 865 * driver (virtio_net.c) supports, thus it's not possible for them 866 * to invoke rte_vhost_driver_set_features(). To workaround it, here 867 * we set it unconditionally. If the application want to implement 868 * another vhost-user driver (say SCSI), it should call the 869 * rte_vhost_driver_set_features(), which will overwrite following 870 * two values. 871 */ 872 vsocket->use_builtin_virtio_net = true; 873 vsocket->supported_features = VIRTIO_NET_SUPPORTED_FEATURES; 874 vsocket->features = VIRTIO_NET_SUPPORTED_FEATURES; 875 vsocket->protocol_features = VHOST_USER_PROTOCOL_FEATURES; 876 877 /* 878 * Dequeue zero copy can't assure descriptors returned in order. 879 * Also, it requires that the guest memory is populated, which is 880 * not compatible with postcopy. 881 */ 882 if (vsocket->dequeue_zero_copy) { 883 vsocket->supported_features &= ~(1ULL << VIRTIO_F_IN_ORDER); 884 vsocket->features &= ~(1ULL << VIRTIO_F_IN_ORDER); 885 886 RTE_LOG(INFO, VHOST_CONFIG, 887 "Dequeue zero copy requested, disabling postcopy support\n"); 888 vsocket->protocol_features &= 889 ~(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT); 890 } 891 892 if (!(flags & RTE_VHOST_USER_IOMMU_SUPPORT)) { 893 vsocket->supported_features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM); 894 vsocket->features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM); 895 } 896 897 if (!(flags & RTE_VHOST_USER_POSTCOPY_SUPPORT)) { 898 vsocket->protocol_features &= 899 ~(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT); 900 } else { 901 #ifndef RTE_LIBRTE_VHOST_POSTCOPY 902 RTE_LOG(ERR, VHOST_CONFIG, 903 "Postcopy requested but not compiled\n"); 904 ret = -1; 905 goto out_mutex; 906 #endif 907 } 908 909 if ((flags & RTE_VHOST_USER_CLIENT) != 0) { 910 vsocket->reconnect = !(flags & RTE_VHOST_USER_NO_RECONNECT); 911 if (vsocket->reconnect && reconn_tid == 0) { 912 if (vhost_user_reconnect_init() != 0) 913 goto out_mutex; 914 } 915 } else { 916 vsocket->is_server = true; 917 } 918 ret = create_unix_socket(vsocket); 919 if (ret < 0) { 920 goto out_mutex; 921 } 922 923 vhost_user.vsockets[vhost_user.vsocket_cnt++] = vsocket; 924 925 pthread_mutex_unlock(&vhost_user.mutex); 926 return ret; 927 928 out_mutex: 929 if (pthread_mutex_destroy(&vsocket->conn_mutex)) { 930 RTE_LOG(ERR, VHOST_CONFIG, 931 "error: failed to destroy connection mutex\n"); 932 } 933 out_free: 934 vhost_user_socket_mem_free(vsocket); 935 out: 936 pthread_mutex_unlock(&vhost_user.mutex); 937 938 return ret; 939 } 940 941 static bool 942 vhost_user_remove_reconnect(struct vhost_user_socket *vsocket) 943 { 944 int found = false; 945 struct vhost_user_reconnect *reconn, *next; 946 947 pthread_mutex_lock(&reconn_list.mutex); 948 949 for (reconn = TAILQ_FIRST(&reconn_list.head); 950 reconn != NULL; reconn = next) { 951 next = TAILQ_NEXT(reconn, next); 952 953 if (reconn->vsocket == vsocket) { 954 TAILQ_REMOVE(&reconn_list.head, reconn, next); 955 close(reconn->fd); 956 free(reconn); 957 found = true; 958 break; 959 } 960 } 961 pthread_mutex_unlock(&reconn_list.mutex); 962 return found; 963 } 964 965 /** 966 * Unregister the specified vhost socket 967 */ 968 int 969 rte_vhost_driver_unregister(const char *path) 970 { 971 int i; 972 int count; 973 struct vhost_user_connection *conn, *next; 974 975 if (path == NULL) 976 return -1; 977 978 again: 979 pthread_mutex_lock(&vhost_user.mutex); 980 981 for (i = 0; i < vhost_user.vsocket_cnt; i++) { 982 struct vhost_user_socket *vsocket = vhost_user.vsockets[i]; 983 984 if (!strcmp(vsocket->path, path)) { 985 pthread_mutex_lock(&vsocket->conn_mutex); 986 for (conn = TAILQ_FIRST(&vsocket->conn_list); 987 conn != NULL; 988 conn = next) { 989 next = TAILQ_NEXT(conn, next); 990 991 /* 992 * If r/wcb is executing, release the 993 * conn_mutex lock, and try again since 994 * the r/wcb may use the conn_mutex lock. 995 */ 996 if (fdset_try_del(&vhost_user.fdset, 997 conn->connfd) == -1) { 998 pthread_mutex_unlock( 999 &vsocket->conn_mutex); 1000 pthread_mutex_unlock(&vhost_user.mutex); 1001 goto again; 1002 } 1003 1004 RTE_LOG(INFO, VHOST_CONFIG, 1005 "free connfd = %d for device '%s'\n", 1006 conn->connfd, path); 1007 close(conn->connfd); 1008 vhost_destroy_device(conn->vid); 1009 TAILQ_REMOVE(&vsocket->conn_list, conn, next); 1010 free(conn); 1011 } 1012 pthread_mutex_unlock(&vsocket->conn_mutex); 1013 1014 if (vsocket->is_server) { 1015 fdset_del(&vhost_user.fdset, 1016 vsocket->socket_fd); 1017 close(vsocket->socket_fd); 1018 unlink(path); 1019 } else if (vsocket->reconnect) { 1020 vhost_user_remove_reconnect(vsocket); 1021 } 1022 1023 pthread_mutex_destroy(&vsocket->conn_mutex); 1024 vhost_user_socket_mem_free(vsocket); 1025 1026 count = --vhost_user.vsocket_cnt; 1027 vhost_user.vsockets[i] = vhost_user.vsockets[count]; 1028 vhost_user.vsockets[count] = NULL; 1029 pthread_mutex_unlock(&vhost_user.mutex); 1030 1031 return 0; 1032 } 1033 } 1034 pthread_mutex_unlock(&vhost_user.mutex); 1035 1036 return -1; 1037 } 1038 1039 /* 1040 * Register ops so that we can add/remove device to data core. 1041 */ 1042 int 1043 rte_vhost_driver_callback_register(const char *path, 1044 struct vhost_device_ops const * const ops) 1045 { 1046 struct vhost_user_socket *vsocket; 1047 1048 pthread_mutex_lock(&vhost_user.mutex); 1049 vsocket = find_vhost_user_socket(path); 1050 if (vsocket) 1051 vsocket->notify_ops = ops; 1052 pthread_mutex_unlock(&vhost_user.mutex); 1053 1054 return vsocket ? 0 : -1; 1055 } 1056 1057 struct vhost_device_ops const * 1058 vhost_driver_callback_get(const char *path) 1059 { 1060 struct vhost_user_socket *vsocket; 1061 1062 pthread_mutex_lock(&vhost_user.mutex); 1063 vsocket = find_vhost_user_socket(path); 1064 pthread_mutex_unlock(&vhost_user.mutex); 1065 1066 return vsocket ? vsocket->notify_ops : NULL; 1067 } 1068 1069 int 1070 rte_vhost_driver_start(const char *path) 1071 { 1072 struct vhost_user_socket *vsocket; 1073 static pthread_t fdset_tid; 1074 1075 pthread_mutex_lock(&vhost_user.mutex); 1076 vsocket = find_vhost_user_socket(path); 1077 pthread_mutex_unlock(&vhost_user.mutex); 1078 1079 if (!vsocket) 1080 return -1; 1081 1082 if (fdset_tid == 0) { 1083 /** 1084 * create a pipe which will be waited by poll and notified to 1085 * rebuild the wait list of poll. 1086 */ 1087 if (fdset_pipe_init(&vhost_user.fdset) < 0) { 1088 RTE_LOG(ERR, VHOST_CONFIG, 1089 "failed to create pipe for vhost fdset\n"); 1090 return -1; 1091 } 1092 1093 int ret = rte_ctrl_thread_create(&fdset_tid, 1094 "vhost-events", NULL, fdset_event_dispatch, 1095 &vhost_user.fdset); 1096 if (ret != 0) { 1097 RTE_LOG(ERR, VHOST_CONFIG, 1098 "failed to create fdset handling thread"); 1099 1100 fdset_pipe_uninit(&vhost_user.fdset); 1101 return -1; 1102 } 1103 } 1104 1105 if (vsocket->is_server) 1106 return vhost_user_start_server(vsocket); 1107 else 1108 return vhost_user_start_client(vsocket); 1109 } 1110