1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2016 Intel Corporation 3 */ 4 5 #include <stdint.h> 6 #include <stdio.h> 7 #include <limits.h> 8 #include <stdlib.h> 9 #include <unistd.h> 10 #include <string.h> 11 #include <sys/types.h> 12 #include <sys/socket.h> 13 #include <sys/un.h> 14 #include <sys/queue.h> 15 #include <errno.h> 16 #include <fcntl.h> 17 #include <pthread.h> 18 19 #include <rte_log.h> 20 21 #include "fd_man.h" 22 #include "vhost.h" 23 #include "vhost_user.h" 24 25 26 TAILQ_HEAD(vhost_user_connection_list, vhost_user_connection); 27 28 /* 29 * Every time rte_vhost_driver_register() is invoked, an associated 30 * vhost_user_socket struct will be created. 31 */ 32 struct vhost_user_socket { 33 struct vhost_user_connection_list conn_list; 34 pthread_mutex_t conn_mutex; 35 char *path; 36 int socket_fd; 37 struct sockaddr_un un; 38 bool is_server; 39 bool reconnect; 40 bool dequeue_zero_copy; 41 bool iommu_support; 42 bool use_builtin_virtio_net; 43 bool extbuf; 44 bool linearbuf; 45 46 /* 47 * The "supported_features" indicates the feature bits the 48 * vhost driver supports. The "features" indicates the feature 49 * bits after the rte_vhost_driver_features_disable/enable(). 50 * It is also the final feature bits used for vhost-user 51 * features negotiation. 52 */ 53 uint64_t supported_features; 54 uint64_t features; 55 56 uint64_t protocol_features; 57 58 /* 59 * Device id to identify a specific backend device. 60 * It's set to -1 for the default software implementation. 61 * If valid, one socket can have 1 connection only. 62 */ 63 int vdpa_dev_id; 64 65 struct vhost_device_ops const *notify_ops; 66 }; 67 68 struct vhost_user_connection { 69 struct vhost_user_socket *vsocket; 70 int connfd; 71 int vid; 72 73 TAILQ_ENTRY(vhost_user_connection) next; 74 }; 75 76 #define MAX_VHOST_SOCKET 1024 77 struct vhost_user { 78 struct vhost_user_socket *vsockets[MAX_VHOST_SOCKET]; 79 struct fdset fdset; 80 int vsocket_cnt; 81 pthread_mutex_t mutex; 82 }; 83 84 #define MAX_VIRTIO_BACKLOG 128 85 86 static void vhost_user_server_new_connection(int fd, void *data, int *remove); 87 static void vhost_user_read_cb(int fd, void *dat, int *remove); 88 static int create_unix_socket(struct vhost_user_socket *vsocket); 89 static int vhost_user_start_client(struct vhost_user_socket *vsocket); 90 91 static struct vhost_user vhost_user = { 92 .fdset = { 93 .fd = { [0 ... MAX_FDS - 1] = {-1, NULL, NULL, NULL, 0} }, 94 .fd_mutex = PTHREAD_MUTEX_INITIALIZER, 95 .fd_pooling_mutex = PTHREAD_MUTEX_INITIALIZER, 96 .num = 0 97 }, 98 .vsocket_cnt = 0, 99 .mutex = PTHREAD_MUTEX_INITIALIZER, 100 }; 101 102 /* 103 * return bytes# of read on success or negative val on failure. Update fdnum 104 * with number of fds read. 105 */ 106 int 107 read_fd_message(int sockfd, char *buf, int buflen, int *fds, int max_fds, 108 int *fd_num) 109 { 110 struct iovec iov; 111 struct msghdr msgh; 112 char control[CMSG_SPACE(max_fds * sizeof(int))]; 113 struct cmsghdr *cmsg; 114 int got_fds = 0; 115 int ret; 116 117 *fd_num = 0; 118 119 memset(&msgh, 0, sizeof(msgh)); 120 iov.iov_base = buf; 121 iov.iov_len = buflen; 122 123 msgh.msg_iov = &iov; 124 msgh.msg_iovlen = 1; 125 msgh.msg_control = control; 126 msgh.msg_controllen = sizeof(control); 127 128 ret = recvmsg(sockfd, &msgh, 0); 129 if (ret <= 0) { 130 if (ret) 131 RTE_LOG(ERR, VHOST_CONFIG, "recvmsg failed\n"); 132 return ret; 133 } 134 135 if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) { 136 RTE_LOG(ERR, VHOST_CONFIG, "truncated msg\n"); 137 return -1; 138 } 139 140 for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL; 141 cmsg = CMSG_NXTHDR(&msgh, cmsg)) { 142 if ((cmsg->cmsg_level == SOL_SOCKET) && 143 (cmsg->cmsg_type == SCM_RIGHTS)) { 144 got_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int); 145 *fd_num = got_fds; 146 memcpy(fds, CMSG_DATA(cmsg), got_fds * sizeof(int)); 147 break; 148 } 149 } 150 151 /* Clear out unused file descriptors */ 152 while (got_fds < max_fds) 153 fds[got_fds++] = -1; 154 155 return ret; 156 } 157 158 int 159 send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num) 160 { 161 162 struct iovec iov; 163 struct msghdr msgh; 164 size_t fdsize = fd_num * sizeof(int); 165 char control[CMSG_SPACE(fdsize)]; 166 struct cmsghdr *cmsg; 167 int ret; 168 169 memset(&msgh, 0, sizeof(msgh)); 170 iov.iov_base = buf; 171 iov.iov_len = buflen; 172 173 msgh.msg_iov = &iov; 174 msgh.msg_iovlen = 1; 175 176 if (fds && fd_num > 0) { 177 msgh.msg_control = control; 178 msgh.msg_controllen = sizeof(control); 179 cmsg = CMSG_FIRSTHDR(&msgh); 180 if (cmsg == NULL) { 181 RTE_LOG(ERR, VHOST_CONFIG, "cmsg == NULL\n"); 182 errno = EINVAL; 183 return -1; 184 } 185 cmsg->cmsg_len = CMSG_LEN(fdsize); 186 cmsg->cmsg_level = SOL_SOCKET; 187 cmsg->cmsg_type = SCM_RIGHTS; 188 memcpy(CMSG_DATA(cmsg), fds, fdsize); 189 } else { 190 msgh.msg_control = NULL; 191 msgh.msg_controllen = 0; 192 } 193 194 do { 195 ret = sendmsg(sockfd, &msgh, MSG_NOSIGNAL); 196 } while (ret < 0 && errno == EINTR); 197 198 if (ret < 0) { 199 RTE_LOG(ERR, VHOST_CONFIG, "sendmsg error\n"); 200 return ret; 201 } 202 203 return ret; 204 } 205 206 static void 207 vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket) 208 { 209 int vid; 210 size_t size; 211 struct vhost_user_connection *conn; 212 int ret; 213 214 if (vsocket == NULL) 215 return; 216 217 conn = malloc(sizeof(*conn)); 218 if (conn == NULL) { 219 close(fd); 220 return; 221 } 222 223 vid = vhost_new_device(); 224 if (vid == -1) { 225 goto err; 226 } 227 228 size = strnlen(vsocket->path, PATH_MAX); 229 vhost_set_ifname(vid, vsocket->path, size); 230 231 vhost_set_builtin_virtio_net(vid, vsocket->use_builtin_virtio_net); 232 233 vhost_attach_vdpa_device(vid, vsocket->vdpa_dev_id); 234 235 if (vsocket->dequeue_zero_copy) 236 vhost_enable_dequeue_zero_copy(vid); 237 238 if (vsocket->extbuf) 239 vhost_enable_extbuf(vid); 240 241 if (vsocket->linearbuf) 242 vhost_enable_linearbuf(vid); 243 244 RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", vid); 245 246 if (vsocket->notify_ops->new_connection) { 247 ret = vsocket->notify_ops->new_connection(vid); 248 if (ret < 0) { 249 RTE_LOG(ERR, VHOST_CONFIG, 250 "failed to add vhost user connection with fd %d\n", 251 fd); 252 goto err_cleanup; 253 } 254 } 255 256 conn->connfd = fd; 257 conn->vsocket = vsocket; 258 conn->vid = vid; 259 ret = fdset_add(&vhost_user.fdset, fd, vhost_user_read_cb, 260 NULL, conn); 261 if (ret < 0) { 262 RTE_LOG(ERR, VHOST_CONFIG, 263 "failed to add fd %d into vhost server fdset\n", 264 fd); 265 266 if (vsocket->notify_ops->destroy_connection) 267 vsocket->notify_ops->destroy_connection(conn->vid); 268 269 goto err_cleanup; 270 } 271 272 pthread_mutex_lock(&vsocket->conn_mutex); 273 TAILQ_INSERT_TAIL(&vsocket->conn_list, conn, next); 274 pthread_mutex_unlock(&vsocket->conn_mutex); 275 276 fdset_pipe_notify(&vhost_user.fdset); 277 return; 278 279 err_cleanup: 280 vhost_destroy_device(vid); 281 err: 282 free(conn); 283 close(fd); 284 } 285 286 /* call back when there is new vhost-user connection from client */ 287 static void 288 vhost_user_server_new_connection(int fd, void *dat, int *remove __rte_unused) 289 { 290 struct vhost_user_socket *vsocket = dat; 291 292 fd = accept(fd, NULL, NULL); 293 if (fd < 0) 294 return; 295 296 RTE_LOG(INFO, VHOST_CONFIG, "new vhost user connection is %d\n", fd); 297 vhost_user_add_connection(fd, vsocket); 298 } 299 300 static void 301 vhost_user_read_cb(int connfd, void *dat, int *remove) 302 { 303 struct vhost_user_connection *conn = dat; 304 struct vhost_user_socket *vsocket = conn->vsocket; 305 int ret; 306 307 ret = vhost_user_msg_handler(conn->vid, connfd); 308 if (ret < 0) { 309 struct virtio_net *dev = get_device(conn->vid); 310 311 close(connfd); 312 *remove = 1; 313 314 if (dev) 315 vhost_destroy_device_notify(dev); 316 317 if (vsocket->notify_ops->destroy_connection) 318 vsocket->notify_ops->destroy_connection(conn->vid); 319 320 vhost_destroy_device(conn->vid); 321 322 if (vsocket->reconnect) { 323 create_unix_socket(vsocket); 324 vhost_user_start_client(vsocket); 325 } 326 327 pthread_mutex_lock(&vsocket->conn_mutex); 328 TAILQ_REMOVE(&vsocket->conn_list, conn, next); 329 pthread_mutex_unlock(&vsocket->conn_mutex); 330 331 free(conn); 332 } 333 } 334 335 static int 336 create_unix_socket(struct vhost_user_socket *vsocket) 337 { 338 int fd; 339 struct sockaddr_un *un = &vsocket->un; 340 341 fd = socket(AF_UNIX, SOCK_STREAM, 0); 342 if (fd < 0) 343 return -1; 344 RTE_LOG(INFO, VHOST_CONFIG, "vhost-user %s: socket created, fd: %d\n", 345 vsocket->is_server ? "server" : "client", fd); 346 347 if (!vsocket->is_server && fcntl(fd, F_SETFL, O_NONBLOCK)) { 348 RTE_LOG(ERR, VHOST_CONFIG, 349 "vhost-user: can't set nonblocking mode for socket, fd: " 350 "%d (%s)\n", fd, strerror(errno)); 351 close(fd); 352 return -1; 353 } 354 355 memset(un, 0, sizeof(*un)); 356 un->sun_family = AF_UNIX; 357 strncpy(un->sun_path, vsocket->path, sizeof(un->sun_path)); 358 un->sun_path[sizeof(un->sun_path) - 1] = '\0'; 359 360 vsocket->socket_fd = fd; 361 return 0; 362 } 363 364 static int 365 vhost_user_start_server(struct vhost_user_socket *vsocket) 366 { 367 int ret; 368 int fd = vsocket->socket_fd; 369 const char *path = vsocket->path; 370 371 /* 372 * bind () may fail if the socket file with the same name already 373 * exists. But the library obviously should not delete the file 374 * provided by the user, since we can not be sure that it is not 375 * being used by other applications. Moreover, many applications form 376 * socket names based on user input, which is prone to errors. 377 * 378 * The user must ensure that the socket does not exist before 379 * registering the vhost driver in server mode. 380 */ 381 ret = bind(fd, (struct sockaddr *)&vsocket->un, sizeof(vsocket->un)); 382 if (ret < 0) { 383 RTE_LOG(ERR, VHOST_CONFIG, 384 "failed to bind to %s: %s; remove it and try again\n", 385 path, strerror(errno)); 386 goto err; 387 } 388 RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path); 389 390 ret = listen(fd, MAX_VIRTIO_BACKLOG); 391 if (ret < 0) 392 goto err; 393 394 ret = fdset_add(&vhost_user.fdset, fd, vhost_user_server_new_connection, 395 NULL, vsocket); 396 if (ret < 0) { 397 RTE_LOG(ERR, VHOST_CONFIG, 398 "failed to add listen fd %d to vhost server fdset\n", 399 fd); 400 goto err; 401 } 402 403 return 0; 404 405 err: 406 close(fd); 407 return -1; 408 } 409 410 struct vhost_user_reconnect { 411 struct sockaddr_un un; 412 int fd; 413 struct vhost_user_socket *vsocket; 414 415 TAILQ_ENTRY(vhost_user_reconnect) next; 416 }; 417 418 TAILQ_HEAD(vhost_user_reconnect_tailq_list, vhost_user_reconnect); 419 struct vhost_user_reconnect_list { 420 struct vhost_user_reconnect_tailq_list head; 421 pthread_mutex_t mutex; 422 }; 423 424 static struct vhost_user_reconnect_list reconn_list; 425 static pthread_t reconn_tid; 426 427 static int 428 vhost_user_connect_nonblock(int fd, struct sockaddr *un, size_t sz) 429 { 430 int ret, flags; 431 432 ret = connect(fd, un, sz); 433 if (ret < 0 && errno != EISCONN) 434 return -1; 435 436 flags = fcntl(fd, F_GETFL, 0); 437 if (flags < 0) { 438 RTE_LOG(ERR, VHOST_CONFIG, 439 "can't get flags for connfd %d\n", fd); 440 return -2; 441 } 442 if ((flags & O_NONBLOCK) && fcntl(fd, F_SETFL, flags & ~O_NONBLOCK)) { 443 RTE_LOG(ERR, VHOST_CONFIG, 444 "can't disable nonblocking on fd %d\n", fd); 445 return -2; 446 } 447 return 0; 448 } 449 450 static void * 451 vhost_user_client_reconnect(void *arg __rte_unused) 452 { 453 int ret; 454 struct vhost_user_reconnect *reconn, *next; 455 456 while (1) { 457 pthread_mutex_lock(&reconn_list.mutex); 458 459 /* 460 * An equal implementation of TAILQ_FOREACH_SAFE, 461 * which does not exist on all platforms. 462 */ 463 for (reconn = TAILQ_FIRST(&reconn_list.head); 464 reconn != NULL; reconn = next) { 465 next = TAILQ_NEXT(reconn, next); 466 467 ret = vhost_user_connect_nonblock(reconn->fd, 468 (struct sockaddr *)&reconn->un, 469 sizeof(reconn->un)); 470 if (ret == -2) { 471 close(reconn->fd); 472 RTE_LOG(ERR, VHOST_CONFIG, 473 "reconnection for fd %d failed\n", 474 reconn->fd); 475 goto remove_fd; 476 } 477 if (ret == -1) 478 continue; 479 480 RTE_LOG(INFO, VHOST_CONFIG, 481 "%s: connected\n", reconn->vsocket->path); 482 vhost_user_add_connection(reconn->fd, reconn->vsocket); 483 remove_fd: 484 TAILQ_REMOVE(&reconn_list.head, reconn, next); 485 free(reconn); 486 } 487 488 pthread_mutex_unlock(&reconn_list.mutex); 489 sleep(1); 490 } 491 492 return NULL; 493 } 494 495 static int 496 vhost_user_reconnect_init(void) 497 { 498 int ret; 499 500 ret = pthread_mutex_init(&reconn_list.mutex, NULL); 501 if (ret < 0) { 502 RTE_LOG(ERR, VHOST_CONFIG, "failed to initialize mutex"); 503 return ret; 504 } 505 TAILQ_INIT(&reconn_list.head); 506 507 ret = rte_ctrl_thread_create(&reconn_tid, "vhost_reconn", NULL, 508 vhost_user_client_reconnect, NULL); 509 if (ret != 0) { 510 RTE_LOG(ERR, VHOST_CONFIG, "failed to create reconnect thread"); 511 if (pthread_mutex_destroy(&reconn_list.mutex)) { 512 RTE_LOG(ERR, VHOST_CONFIG, 513 "failed to destroy reconnect mutex"); 514 } 515 } 516 517 return ret; 518 } 519 520 static int 521 vhost_user_start_client(struct vhost_user_socket *vsocket) 522 { 523 int ret; 524 int fd = vsocket->socket_fd; 525 const char *path = vsocket->path; 526 struct vhost_user_reconnect *reconn; 527 528 ret = vhost_user_connect_nonblock(fd, (struct sockaddr *)&vsocket->un, 529 sizeof(vsocket->un)); 530 if (ret == 0) { 531 vhost_user_add_connection(fd, vsocket); 532 return 0; 533 } 534 535 RTE_LOG(WARNING, VHOST_CONFIG, 536 "failed to connect to %s: %s\n", 537 path, strerror(errno)); 538 539 if (ret == -2 || !vsocket->reconnect) { 540 close(fd); 541 return -1; 542 } 543 544 RTE_LOG(INFO, VHOST_CONFIG, "%s: reconnecting...\n", path); 545 reconn = malloc(sizeof(*reconn)); 546 if (reconn == NULL) { 547 RTE_LOG(ERR, VHOST_CONFIG, 548 "failed to allocate memory for reconnect\n"); 549 close(fd); 550 return -1; 551 } 552 reconn->un = vsocket->un; 553 reconn->fd = fd; 554 reconn->vsocket = vsocket; 555 pthread_mutex_lock(&reconn_list.mutex); 556 TAILQ_INSERT_TAIL(&reconn_list.head, reconn, next); 557 pthread_mutex_unlock(&reconn_list.mutex); 558 559 return 0; 560 } 561 562 static struct vhost_user_socket * 563 find_vhost_user_socket(const char *path) 564 { 565 int i; 566 567 if (path == NULL) 568 return NULL; 569 570 for (i = 0; i < vhost_user.vsocket_cnt; i++) { 571 struct vhost_user_socket *vsocket = vhost_user.vsockets[i]; 572 573 if (!strcmp(vsocket->path, path)) 574 return vsocket; 575 } 576 577 return NULL; 578 } 579 580 int 581 rte_vhost_driver_attach_vdpa_device(const char *path, int did) 582 { 583 struct vhost_user_socket *vsocket; 584 585 if (rte_vdpa_get_device(did) == NULL || path == NULL) 586 return -1; 587 588 pthread_mutex_lock(&vhost_user.mutex); 589 vsocket = find_vhost_user_socket(path); 590 if (vsocket) 591 vsocket->vdpa_dev_id = did; 592 pthread_mutex_unlock(&vhost_user.mutex); 593 594 return vsocket ? 0 : -1; 595 } 596 597 int 598 rte_vhost_driver_detach_vdpa_device(const char *path) 599 { 600 struct vhost_user_socket *vsocket; 601 602 pthread_mutex_lock(&vhost_user.mutex); 603 vsocket = find_vhost_user_socket(path); 604 if (vsocket) 605 vsocket->vdpa_dev_id = -1; 606 pthread_mutex_unlock(&vhost_user.mutex); 607 608 return vsocket ? 0 : -1; 609 } 610 611 int 612 rte_vhost_driver_get_vdpa_device_id(const char *path) 613 { 614 struct vhost_user_socket *vsocket; 615 int did = -1; 616 617 pthread_mutex_lock(&vhost_user.mutex); 618 vsocket = find_vhost_user_socket(path); 619 if (vsocket) 620 did = vsocket->vdpa_dev_id; 621 pthread_mutex_unlock(&vhost_user.mutex); 622 623 return did; 624 } 625 626 int 627 rte_vhost_driver_disable_features(const char *path, uint64_t features) 628 { 629 struct vhost_user_socket *vsocket; 630 631 pthread_mutex_lock(&vhost_user.mutex); 632 vsocket = find_vhost_user_socket(path); 633 634 /* Note that use_builtin_virtio_net is not affected by this function 635 * since callers may want to selectively disable features of the 636 * built-in vhost net device backend. 637 */ 638 639 if (vsocket) 640 vsocket->features &= ~features; 641 pthread_mutex_unlock(&vhost_user.mutex); 642 643 return vsocket ? 0 : -1; 644 } 645 646 int 647 rte_vhost_driver_enable_features(const char *path, uint64_t features) 648 { 649 struct vhost_user_socket *vsocket; 650 651 pthread_mutex_lock(&vhost_user.mutex); 652 vsocket = find_vhost_user_socket(path); 653 if (vsocket) { 654 if ((vsocket->supported_features & features) != features) { 655 /* 656 * trying to enable features the driver doesn't 657 * support. 658 */ 659 pthread_mutex_unlock(&vhost_user.mutex); 660 return -1; 661 } 662 vsocket->features |= features; 663 } 664 pthread_mutex_unlock(&vhost_user.mutex); 665 666 return vsocket ? 0 : -1; 667 } 668 669 int 670 rte_vhost_driver_set_features(const char *path, uint64_t features) 671 { 672 struct vhost_user_socket *vsocket; 673 674 pthread_mutex_lock(&vhost_user.mutex); 675 vsocket = find_vhost_user_socket(path); 676 if (vsocket) { 677 vsocket->supported_features = features; 678 vsocket->features = features; 679 680 /* Anyone setting feature bits is implementing their own vhost 681 * device backend. 682 */ 683 vsocket->use_builtin_virtio_net = false; 684 } 685 pthread_mutex_unlock(&vhost_user.mutex); 686 687 return vsocket ? 0 : -1; 688 } 689 690 int 691 rte_vhost_driver_get_features(const char *path, uint64_t *features) 692 { 693 struct vhost_user_socket *vsocket; 694 uint64_t vdpa_features; 695 struct rte_vdpa_device *vdpa_dev; 696 int did = -1; 697 int ret = 0; 698 699 pthread_mutex_lock(&vhost_user.mutex); 700 vsocket = find_vhost_user_socket(path); 701 if (!vsocket) { 702 RTE_LOG(ERR, VHOST_CONFIG, 703 "socket file %s is not registered yet.\n", path); 704 ret = -1; 705 goto unlock_exit; 706 } 707 708 did = vsocket->vdpa_dev_id; 709 vdpa_dev = rte_vdpa_get_device(did); 710 if (!vdpa_dev || !vdpa_dev->ops->get_features) { 711 *features = vsocket->features; 712 goto unlock_exit; 713 } 714 715 if (vdpa_dev->ops->get_features(did, &vdpa_features) < 0) { 716 RTE_LOG(ERR, VHOST_CONFIG, 717 "failed to get vdpa features " 718 "for socket file %s.\n", path); 719 ret = -1; 720 goto unlock_exit; 721 } 722 723 *features = vsocket->features & vdpa_features; 724 725 unlock_exit: 726 pthread_mutex_unlock(&vhost_user.mutex); 727 return ret; 728 } 729 730 int 731 rte_vhost_driver_set_protocol_features(const char *path, 732 uint64_t protocol_features) 733 { 734 struct vhost_user_socket *vsocket; 735 736 pthread_mutex_lock(&vhost_user.mutex); 737 vsocket = find_vhost_user_socket(path); 738 if (vsocket) 739 vsocket->protocol_features = protocol_features; 740 pthread_mutex_unlock(&vhost_user.mutex); 741 return vsocket ? 0 : -1; 742 } 743 744 int 745 rte_vhost_driver_get_protocol_features(const char *path, 746 uint64_t *protocol_features) 747 { 748 struct vhost_user_socket *vsocket; 749 uint64_t vdpa_protocol_features; 750 struct rte_vdpa_device *vdpa_dev; 751 int did = -1; 752 int ret = 0; 753 754 pthread_mutex_lock(&vhost_user.mutex); 755 vsocket = find_vhost_user_socket(path); 756 if (!vsocket) { 757 RTE_LOG(ERR, VHOST_CONFIG, 758 "socket file %s is not registered yet.\n", path); 759 ret = -1; 760 goto unlock_exit; 761 } 762 763 did = vsocket->vdpa_dev_id; 764 vdpa_dev = rte_vdpa_get_device(did); 765 if (!vdpa_dev || !vdpa_dev->ops->get_protocol_features) { 766 *protocol_features = vsocket->protocol_features; 767 goto unlock_exit; 768 } 769 770 if (vdpa_dev->ops->get_protocol_features(did, 771 &vdpa_protocol_features) < 0) { 772 RTE_LOG(ERR, VHOST_CONFIG, 773 "failed to get vdpa protocol features " 774 "for socket file %s.\n", path); 775 ret = -1; 776 goto unlock_exit; 777 } 778 779 *protocol_features = vsocket->protocol_features 780 & vdpa_protocol_features; 781 782 unlock_exit: 783 pthread_mutex_unlock(&vhost_user.mutex); 784 return ret; 785 } 786 787 int 788 rte_vhost_driver_get_queue_num(const char *path, uint32_t *queue_num) 789 { 790 struct vhost_user_socket *vsocket; 791 uint32_t vdpa_queue_num; 792 struct rte_vdpa_device *vdpa_dev; 793 int did = -1; 794 int ret = 0; 795 796 pthread_mutex_lock(&vhost_user.mutex); 797 vsocket = find_vhost_user_socket(path); 798 if (!vsocket) { 799 RTE_LOG(ERR, VHOST_CONFIG, 800 "socket file %s is not registered yet.\n", path); 801 ret = -1; 802 goto unlock_exit; 803 } 804 805 did = vsocket->vdpa_dev_id; 806 vdpa_dev = rte_vdpa_get_device(did); 807 if (!vdpa_dev || !vdpa_dev->ops->get_queue_num) { 808 *queue_num = VHOST_MAX_QUEUE_PAIRS; 809 goto unlock_exit; 810 } 811 812 if (vdpa_dev->ops->get_queue_num(did, &vdpa_queue_num) < 0) { 813 RTE_LOG(ERR, VHOST_CONFIG, 814 "failed to get vdpa queue number " 815 "for socket file %s.\n", path); 816 ret = -1; 817 goto unlock_exit; 818 } 819 820 *queue_num = RTE_MIN((uint32_t)VHOST_MAX_QUEUE_PAIRS, vdpa_queue_num); 821 822 unlock_exit: 823 pthread_mutex_unlock(&vhost_user.mutex); 824 return ret; 825 } 826 827 static void 828 vhost_user_socket_mem_free(struct vhost_user_socket *vsocket) 829 { 830 if (vsocket && vsocket->path) { 831 free(vsocket->path); 832 vsocket->path = NULL; 833 } 834 835 if (vsocket) { 836 free(vsocket); 837 vsocket = NULL; 838 } 839 } 840 841 /* 842 * Register a new vhost-user socket; here we could act as server 843 * (the default case), or client (when RTE_VHOST_USER_CLIENT) flag 844 * is set. 845 */ 846 int 847 rte_vhost_driver_register(const char *path, uint64_t flags) 848 { 849 int ret = -1; 850 struct vhost_user_socket *vsocket; 851 852 if (!path) 853 return -1; 854 855 pthread_mutex_lock(&vhost_user.mutex); 856 857 if (vhost_user.vsocket_cnt == MAX_VHOST_SOCKET) { 858 RTE_LOG(ERR, VHOST_CONFIG, 859 "error: the number of vhost sockets reaches maximum\n"); 860 goto out; 861 } 862 863 vsocket = malloc(sizeof(struct vhost_user_socket)); 864 if (!vsocket) 865 goto out; 866 memset(vsocket, 0, sizeof(struct vhost_user_socket)); 867 vsocket->path = strdup(path); 868 if (vsocket->path == NULL) { 869 RTE_LOG(ERR, VHOST_CONFIG, 870 "error: failed to copy socket path string\n"); 871 vhost_user_socket_mem_free(vsocket); 872 goto out; 873 } 874 TAILQ_INIT(&vsocket->conn_list); 875 ret = pthread_mutex_init(&vsocket->conn_mutex, NULL); 876 if (ret) { 877 RTE_LOG(ERR, VHOST_CONFIG, 878 "error: failed to init connection mutex\n"); 879 goto out_free; 880 } 881 vsocket->vdpa_dev_id = -1; 882 vsocket->dequeue_zero_copy = flags & RTE_VHOST_USER_DEQUEUE_ZERO_COPY; 883 vsocket->extbuf = flags & RTE_VHOST_USER_EXTBUF_SUPPORT; 884 vsocket->linearbuf = flags & RTE_VHOST_USER_LINEARBUF_SUPPORT; 885 886 if (vsocket->dequeue_zero_copy && 887 (flags & RTE_VHOST_USER_IOMMU_SUPPORT)) { 888 RTE_LOG(ERR, VHOST_CONFIG, 889 "error: enabling dequeue zero copy and IOMMU features " 890 "simultaneously is not supported\n"); 891 goto out_mutex; 892 } 893 894 /* 895 * Set the supported features correctly for the builtin vhost-user 896 * net driver. 897 * 898 * Applications know nothing about features the builtin virtio net 899 * driver (virtio_net.c) supports, thus it's not possible for them 900 * to invoke rte_vhost_driver_set_features(). To workaround it, here 901 * we set it unconditionally. If the application want to implement 902 * another vhost-user driver (say SCSI), it should call the 903 * rte_vhost_driver_set_features(), which will overwrite following 904 * two values. 905 */ 906 vsocket->use_builtin_virtio_net = true; 907 vsocket->supported_features = VIRTIO_NET_SUPPORTED_FEATURES; 908 vsocket->features = VIRTIO_NET_SUPPORTED_FEATURES; 909 vsocket->protocol_features = VHOST_USER_PROTOCOL_FEATURES; 910 911 /* 912 * Dequeue zero copy can't assure descriptors returned in order. 913 * Also, it requires that the guest memory is populated, which is 914 * not compatible with postcopy. 915 */ 916 if (vsocket->dequeue_zero_copy) { 917 if (vsocket->extbuf) { 918 RTE_LOG(ERR, VHOST_CONFIG, 919 "error: zero copy is incompatible with external buffers\n"); 920 ret = -1; 921 goto out_mutex; 922 } 923 if (vsocket->linearbuf) { 924 RTE_LOG(ERR, VHOST_CONFIG, 925 "error: zero copy is incompatible with linear buffers\n"); 926 ret = -1; 927 goto out_mutex; 928 } 929 vsocket->supported_features &= ~(1ULL << VIRTIO_F_IN_ORDER); 930 vsocket->features &= ~(1ULL << VIRTIO_F_IN_ORDER); 931 932 RTE_LOG(INFO, VHOST_CONFIG, 933 "Dequeue zero copy requested, disabling postcopy support\n"); 934 vsocket->protocol_features &= 935 ~(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT); 936 } 937 938 /* 939 * We'll not be able to receive a buffer from guest in linear mode 940 * without external buffer if it will not fit in a single mbuf, which is 941 * likely if segmentation offloading enabled. 942 */ 943 if (vsocket->linearbuf && !vsocket->extbuf) { 944 uint64_t seg_offload_features = 945 (1ULL << VIRTIO_NET_F_HOST_TSO4) | 946 (1ULL << VIRTIO_NET_F_HOST_TSO6) | 947 (1ULL << VIRTIO_NET_F_HOST_UFO); 948 949 RTE_LOG(INFO, VHOST_CONFIG, 950 "Linear buffers requested without external buffers, " 951 "disabling host segmentation offloading support\n"); 952 vsocket->supported_features &= ~seg_offload_features; 953 vsocket->features &= ~seg_offload_features; 954 } 955 956 if (!(flags & RTE_VHOST_USER_IOMMU_SUPPORT)) { 957 vsocket->supported_features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM); 958 vsocket->features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM); 959 } 960 961 if (!(flags & RTE_VHOST_USER_POSTCOPY_SUPPORT)) { 962 vsocket->protocol_features &= 963 ~(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT); 964 } else { 965 #ifndef RTE_LIBRTE_VHOST_POSTCOPY 966 RTE_LOG(ERR, VHOST_CONFIG, 967 "Postcopy requested but not compiled\n"); 968 ret = -1; 969 goto out_mutex; 970 #endif 971 } 972 973 if ((flags & RTE_VHOST_USER_CLIENT) != 0) { 974 vsocket->reconnect = !(flags & RTE_VHOST_USER_NO_RECONNECT); 975 if (vsocket->reconnect && reconn_tid == 0) { 976 if (vhost_user_reconnect_init() != 0) 977 goto out_mutex; 978 } 979 } else { 980 vsocket->is_server = true; 981 } 982 ret = create_unix_socket(vsocket); 983 if (ret < 0) { 984 goto out_mutex; 985 } 986 987 vhost_user.vsockets[vhost_user.vsocket_cnt++] = vsocket; 988 989 pthread_mutex_unlock(&vhost_user.mutex); 990 return ret; 991 992 out_mutex: 993 if (pthread_mutex_destroy(&vsocket->conn_mutex)) { 994 RTE_LOG(ERR, VHOST_CONFIG, 995 "error: failed to destroy connection mutex\n"); 996 } 997 out_free: 998 vhost_user_socket_mem_free(vsocket); 999 out: 1000 pthread_mutex_unlock(&vhost_user.mutex); 1001 1002 return ret; 1003 } 1004 1005 static bool 1006 vhost_user_remove_reconnect(struct vhost_user_socket *vsocket) 1007 { 1008 int found = false; 1009 struct vhost_user_reconnect *reconn, *next; 1010 1011 pthread_mutex_lock(&reconn_list.mutex); 1012 1013 for (reconn = TAILQ_FIRST(&reconn_list.head); 1014 reconn != NULL; reconn = next) { 1015 next = TAILQ_NEXT(reconn, next); 1016 1017 if (reconn->vsocket == vsocket) { 1018 TAILQ_REMOVE(&reconn_list.head, reconn, next); 1019 close(reconn->fd); 1020 free(reconn); 1021 found = true; 1022 break; 1023 } 1024 } 1025 pthread_mutex_unlock(&reconn_list.mutex); 1026 return found; 1027 } 1028 1029 /** 1030 * Unregister the specified vhost socket 1031 */ 1032 int 1033 rte_vhost_driver_unregister(const char *path) 1034 { 1035 int i; 1036 int count; 1037 struct vhost_user_connection *conn, *next; 1038 1039 if (path == NULL) 1040 return -1; 1041 1042 again: 1043 pthread_mutex_lock(&vhost_user.mutex); 1044 1045 for (i = 0; i < vhost_user.vsocket_cnt; i++) { 1046 struct vhost_user_socket *vsocket = vhost_user.vsockets[i]; 1047 1048 if (!strcmp(vsocket->path, path)) { 1049 pthread_mutex_lock(&vsocket->conn_mutex); 1050 for (conn = TAILQ_FIRST(&vsocket->conn_list); 1051 conn != NULL; 1052 conn = next) { 1053 next = TAILQ_NEXT(conn, next); 1054 1055 /* 1056 * If r/wcb is executing, release vsocket's 1057 * conn_mutex and vhost_user's mutex locks, and 1058 * try again since the r/wcb may use the 1059 * conn_mutex and mutex locks. 1060 */ 1061 if (fdset_try_del(&vhost_user.fdset, 1062 conn->connfd) == -1) { 1063 pthread_mutex_unlock( 1064 &vsocket->conn_mutex); 1065 pthread_mutex_unlock(&vhost_user.mutex); 1066 goto again; 1067 } 1068 1069 RTE_LOG(INFO, VHOST_CONFIG, 1070 "free connfd = %d for device '%s'\n", 1071 conn->connfd, path); 1072 close(conn->connfd); 1073 vhost_destroy_device(conn->vid); 1074 TAILQ_REMOVE(&vsocket->conn_list, conn, next); 1075 free(conn); 1076 } 1077 pthread_mutex_unlock(&vsocket->conn_mutex); 1078 1079 if (vsocket->is_server) { 1080 /* 1081 * If r/wcb is executing, release vhost_user's 1082 * mutex lock, and try again since the r/wcb 1083 * may use the mutex lock. 1084 */ 1085 if (fdset_try_del(&vhost_user.fdset, 1086 vsocket->socket_fd) == -1) { 1087 pthread_mutex_unlock(&vhost_user.mutex); 1088 goto again; 1089 } 1090 1091 close(vsocket->socket_fd); 1092 unlink(path); 1093 } else if (vsocket->reconnect) { 1094 vhost_user_remove_reconnect(vsocket); 1095 } 1096 1097 pthread_mutex_destroy(&vsocket->conn_mutex); 1098 vhost_user_socket_mem_free(vsocket); 1099 1100 count = --vhost_user.vsocket_cnt; 1101 vhost_user.vsockets[i] = vhost_user.vsockets[count]; 1102 vhost_user.vsockets[count] = NULL; 1103 pthread_mutex_unlock(&vhost_user.mutex); 1104 1105 return 0; 1106 } 1107 } 1108 pthread_mutex_unlock(&vhost_user.mutex); 1109 1110 return -1; 1111 } 1112 1113 /* 1114 * Register ops so that we can add/remove device to data core. 1115 */ 1116 int 1117 rte_vhost_driver_callback_register(const char *path, 1118 struct vhost_device_ops const * const ops) 1119 { 1120 struct vhost_user_socket *vsocket; 1121 1122 pthread_mutex_lock(&vhost_user.mutex); 1123 vsocket = find_vhost_user_socket(path); 1124 if (vsocket) 1125 vsocket->notify_ops = ops; 1126 pthread_mutex_unlock(&vhost_user.mutex); 1127 1128 return vsocket ? 0 : -1; 1129 } 1130 1131 struct vhost_device_ops const * 1132 vhost_driver_callback_get(const char *path) 1133 { 1134 struct vhost_user_socket *vsocket; 1135 1136 pthread_mutex_lock(&vhost_user.mutex); 1137 vsocket = find_vhost_user_socket(path); 1138 pthread_mutex_unlock(&vhost_user.mutex); 1139 1140 return vsocket ? vsocket->notify_ops : NULL; 1141 } 1142 1143 int 1144 rte_vhost_driver_start(const char *path) 1145 { 1146 struct vhost_user_socket *vsocket; 1147 static pthread_t fdset_tid; 1148 1149 pthread_mutex_lock(&vhost_user.mutex); 1150 vsocket = find_vhost_user_socket(path); 1151 pthread_mutex_unlock(&vhost_user.mutex); 1152 1153 if (!vsocket) 1154 return -1; 1155 1156 if (fdset_tid == 0) { 1157 /** 1158 * create a pipe which will be waited by poll and notified to 1159 * rebuild the wait list of poll. 1160 */ 1161 if (fdset_pipe_init(&vhost_user.fdset) < 0) { 1162 RTE_LOG(ERR, VHOST_CONFIG, 1163 "failed to create pipe for vhost fdset\n"); 1164 return -1; 1165 } 1166 1167 int ret = rte_ctrl_thread_create(&fdset_tid, 1168 "vhost-events", NULL, fdset_event_dispatch, 1169 &vhost_user.fdset); 1170 if (ret != 0) { 1171 RTE_LOG(ERR, VHOST_CONFIG, 1172 "failed to create fdset handling thread"); 1173 1174 fdset_pipe_uninit(&vhost_user.fdset); 1175 return -1; 1176 } 1177 } 1178 1179 if (vsocket->is_server) 1180 return vhost_user_start_server(vsocket); 1181 else 1182 return vhost_user_start_client(vsocket); 1183 } 1184