1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2016 Intel Corporation 3 */ 4 5 #include <stdint.h> 6 #include <stdio.h> 7 #include <limits.h> 8 #include <stdlib.h> 9 #include <unistd.h> 10 #include <string.h> 11 #include <sys/types.h> 12 #include <sys/socket.h> 13 #include <sys/un.h> 14 #include <sys/queue.h> 15 #include <errno.h> 16 #include <fcntl.h> 17 #include <pthread.h> 18 19 #include <rte_log.h> 20 21 #include "fd_man.h" 22 #include "vhost.h" 23 #include "vhost_user.h" 24 25 26 TAILQ_HEAD(vhost_user_connection_list, vhost_user_connection); 27 28 /* 29 * Every time rte_vhost_driver_register() is invoked, an associated 30 * vhost_user_socket struct will be created. 31 */ 32 struct vhost_user_socket { 33 struct vhost_user_connection_list conn_list; 34 pthread_mutex_t conn_mutex; 35 char *path; 36 int socket_fd; 37 struct sockaddr_un un; 38 bool is_server; 39 bool reconnect; 40 bool dequeue_zero_copy; 41 bool iommu_support; 42 bool use_builtin_virtio_net; 43 44 /* 45 * The "supported_features" indicates the feature bits the 46 * vhost driver supports. The "features" indicates the feature 47 * bits after the rte_vhost_driver_features_disable/enable(). 48 * It is also the final feature bits used for vhost-user 49 * features negotiation. 50 */ 51 uint64_t supported_features; 52 uint64_t features; 53 54 uint64_t protocol_features; 55 56 /* 57 * Device id to identify a specific backend device. 58 * It's set to -1 for the default software implementation. 59 * If valid, one socket can have 1 connection only. 60 */ 61 int vdpa_dev_id; 62 63 struct vhost_device_ops const *notify_ops; 64 }; 65 66 struct vhost_user_connection { 67 struct vhost_user_socket *vsocket; 68 int connfd; 69 int vid; 70 71 TAILQ_ENTRY(vhost_user_connection) next; 72 }; 73 74 #define MAX_VHOST_SOCKET 1024 75 struct vhost_user { 76 struct vhost_user_socket *vsockets[MAX_VHOST_SOCKET]; 77 struct fdset fdset; 78 int vsocket_cnt; 79 pthread_mutex_t mutex; 80 }; 81 82 #define MAX_VIRTIO_BACKLOG 128 83 84 static void vhost_user_server_new_connection(int fd, void *data, int *remove); 85 static void vhost_user_read_cb(int fd, void *dat, int *remove); 86 static int create_unix_socket(struct vhost_user_socket *vsocket); 87 static int vhost_user_start_client(struct vhost_user_socket *vsocket); 88 89 static struct vhost_user vhost_user = { 90 .fdset = { 91 .fd = { [0 ... MAX_FDS - 1] = {-1, NULL, NULL, NULL, 0} }, 92 .fd_mutex = PTHREAD_MUTEX_INITIALIZER, 93 .num = 0 94 }, 95 .vsocket_cnt = 0, 96 .mutex = PTHREAD_MUTEX_INITIALIZER, 97 }; 98 99 /* 100 * return bytes# of read on success or negative val on failure. Update fdnum 101 * with number of fds read. 102 */ 103 int 104 read_fd_message(int sockfd, char *buf, int buflen, int *fds, int max_fds, 105 int *fd_num) 106 { 107 struct iovec iov; 108 struct msghdr msgh; 109 char control[CMSG_SPACE(max_fds * sizeof(int))]; 110 struct cmsghdr *cmsg; 111 int got_fds = 0; 112 int ret; 113 114 *fd_num = 0; 115 116 memset(&msgh, 0, sizeof(msgh)); 117 iov.iov_base = buf; 118 iov.iov_len = buflen; 119 120 msgh.msg_iov = &iov; 121 msgh.msg_iovlen = 1; 122 msgh.msg_control = control; 123 msgh.msg_controllen = sizeof(control); 124 125 ret = recvmsg(sockfd, &msgh, 0); 126 if (ret <= 0) { 127 RTE_LOG(ERR, VHOST_CONFIG, "recvmsg failed\n"); 128 return ret; 129 } 130 131 if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) { 132 RTE_LOG(ERR, VHOST_CONFIG, "truncted msg\n"); 133 return -1; 134 } 135 136 for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL; 137 cmsg = CMSG_NXTHDR(&msgh, cmsg)) { 138 if ((cmsg->cmsg_level == SOL_SOCKET) && 139 (cmsg->cmsg_type == SCM_RIGHTS)) { 140 got_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int); 141 *fd_num = got_fds; 142 memcpy(fds, CMSG_DATA(cmsg), got_fds * sizeof(int)); 143 break; 144 } 145 } 146 147 /* Clear out unused file descriptors */ 148 while (got_fds < max_fds) 149 fds[got_fds++] = -1; 150 151 return ret; 152 } 153 154 int 155 send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num) 156 { 157 158 struct iovec iov; 159 struct msghdr msgh; 160 size_t fdsize = fd_num * sizeof(int); 161 char control[CMSG_SPACE(fdsize)]; 162 struct cmsghdr *cmsg; 163 int ret; 164 165 memset(&msgh, 0, sizeof(msgh)); 166 iov.iov_base = buf; 167 iov.iov_len = buflen; 168 169 msgh.msg_iov = &iov; 170 msgh.msg_iovlen = 1; 171 172 if (fds && fd_num > 0) { 173 msgh.msg_control = control; 174 msgh.msg_controllen = sizeof(control); 175 cmsg = CMSG_FIRSTHDR(&msgh); 176 if (cmsg == NULL) { 177 RTE_LOG(ERR, VHOST_CONFIG, "cmsg == NULL\n"); 178 errno = EINVAL; 179 return -1; 180 } 181 cmsg->cmsg_len = CMSG_LEN(fdsize); 182 cmsg->cmsg_level = SOL_SOCKET; 183 cmsg->cmsg_type = SCM_RIGHTS; 184 memcpy(CMSG_DATA(cmsg), fds, fdsize); 185 } else { 186 msgh.msg_control = NULL; 187 msgh.msg_controllen = 0; 188 } 189 190 do { 191 ret = sendmsg(sockfd, &msgh, MSG_NOSIGNAL); 192 } while (ret < 0 && errno == EINTR); 193 194 if (ret < 0) { 195 RTE_LOG(ERR, VHOST_CONFIG, "sendmsg error\n"); 196 return ret; 197 } 198 199 return ret; 200 } 201 202 static void 203 vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket) 204 { 205 int vid; 206 size_t size; 207 struct vhost_user_connection *conn; 208 int ret; 209 210 if (vsocket == NULL) 211 return; 212 213 conn = malloc(sizeof(*conn)); 214 if (conn == NULL) { 215 close(fd); 216 return; 217 } 218 219 vid = vhost_new_device(); 220 if (vid == -1) { 221 goto err; 222 } 223 224 size = strnlen(vsocket->path, PATH_MAX); 225 vhost_set_ifname(vid, vsocket->path, size); 226 227 vhost_set_builtin_virtio_net(vid, vsocket->use_builtin_virtio_net); 228 229 vhost_attach_vdpa_device(vid, vsocket->vdpa_dev_id); 230 231 if (vsocket->dequeue_zero_copy) 232 vhost_enable_dequeue_zero_copy(vid); 233 234 RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", vid); 235 236 if (vsocket->notify_ops->new_connection) { 237 ret = vsocket->notify_ops->new_connection(vid); 238 if (ret < 0) { 239 RTE_LOG(ERR, VHOST_CONFIG, 240 "failed to add vhost user connection with fd %d\n", 241 fd); 242 goto err; 243 } 244 } 245 246 conn->connfd = fd; 247 conn->vsocket = vsocket; 248 conn->vid = vid; 249 ret = fdset_add(&vhost_user.fdset, fd, vhost_user_read_cb, 250 NULL, conn); 251 if (ret < 0) { 252 RTE_LOG(ERR, VHOST_CONFIG, 253 "failed to add fd %d into vhost server fdset\n", 254 fd); 255 256 if (vsocket->notify_ops->destroy_connection) 257 vsocket->notify_ops->destroy_connection(conn->vid); 258 259 goto err; 260 } 261 262 pthread_mutex_lock(&vsocket->conn_mutex); 263 TAILQ_INSERT_TAIL(&vsocket->conn_list, conn, next); 264 pthread_mutex_unlock(&vsocket->conn_mutex); 265 266 fdset_pipe_notify(&vhost_user.fdset); 267 return; 268 269 err: 270 free(conn); 271 close(fd); 272 } 273 274 /* call back when there is new vhost-user connection from client */ 275 static void 276 vhost_user_server_new_connection(int fd, void *dat, int *remove __rte_unused) 277 { 278 struct vhost_user_socket *vsocket = dat; 279 280 fd = accept(fd, NULL, NULL); 281 if (fd < 0) 282 return; 283 284 RTE_LOG(INFO, VHOST_CONFIG, "new vhost user connection is %d\n", fd); 285 vhost_user_add_connection(fd, vsocket); 286 } 287 288 static void 289 vhost_user_read_cb(int connfd, void *dat, int *remove) 290 { 291 struct vhost_user_connection *conn = dat; 292 struct vhost_user_socket *vsocket = conn->vsocket; 293 int ret; 294 295 ret = vhost_user_msg_handler(conn->vid, connfd); 296 if (ret < 0) { 297 close(connfd); 298 *remove = 1; 299 vhost_destroy_device(conn->vid); 300 301 if (vsocket->notify_ops->destroy_connection) 302 vsocket->notify_ops->destroy_connection(conn->vid); 303 304 pthread_mutex_lock(&vsocket->conn_mutex); 305 TAILQ_REMOVE(&vsocket->conn_list, conn, next); 306 pthread_mutex_unlock(&vsocket->conn_mutex); 307 308 free(conn); 309 310 if (vsocket->reconnect) { 311 create_unix_socket(vsocket); 312 vhost_user_start_client(vsocket); 313 } 314 } 315 } 316 317 static int 318 create_unix_socket(struct vhost_user_socket *vsocket) 319 { 320 int fd; 321 struct sockaddr_un *un = &vsocket->un; 322 323 fd = socket(AF_UNIX, SOCK_STREAM, 0); 324 if (fd < 0) 325 return -1; 326 RTE_LOG(INFO, VHOST_CONFIG, "vhost-user %s: socket created, fd: %d\n", 327 vsocket->is_server ? "server" : "client", fd); 328 329 if (!vsocket->is_server && fcntl(fd, F_SETFL, O_NONBLOCK)) { 330 RTE_LOG(ERR, VHOST_CONFIG, 331 "vhost-user: can't set nonblocking mode for socket, fd: " 332 "%d (%s)\n", fd, strerror(errno)); 333 close(fd); 334 return -1; 335 } 336 337 memset(un, 0, sizeof(*un)); 338 un->sun_family = AF_UNIX; 339 strncpy(un->sun_path, vsocket->path, sizeof(un->sun_path)); 340 un->sun_path[sizeof(un->sun_path) - 1] = '\0'; 341 342 vsocket->socket_fd = fd; 343 return 0; 344 } 345 346 static int 347 vhost_user_start_server(struct vhost_user_socket *vsocket) 348 { 349 int ret; 350 int fd = vsocket->socket_fd; 351 const char *path = vsocket->path; 352 353 /* 354 * bind () may fail if the socket file with the same name already 355 * exists. But the library obviously should not delete the file 356 * provided by the user, since we can not be sure that it is not 357 * being used by other applications. Moreover, many applications form 358 * socket names based on user input, which is prone to errors. 359 * 360 * The user must ensure that the socket does not exist before 361 * registering the vhost driver in server mode. 362 */ 363 ret = bind(fd, (struct sockaddr *)&vsocket->un, sizeof(vsocket->un)); 364 if (ret < 0) { 365 RTE_LOG(ERR, VHOST_CONFIG, 366 "failed to bind to %s: %s; remove it and try again\n", 367 path, strerror(errno)); 368 goto err; 369 } 370 RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path); 371 372 ret = listen(fd, MAX_VIRTIO_BACKLOG); 373 if (ret < 0) 374 goto err; 375 376 ret = fdset_add(&vhost_user.fdset, fd, vhost_user_server_new_connection, 377 NULL, vsocket); 378 if (ret < 0) { 379 RTE_LOG(ERR, VHOST_CONFIG, 380 "failed to add listen fd %d to vhost server fdset\n", 381 fd); 382 goto err; 383 } 384 385 return 0; 386 387 err: 388 close(fd); 389 return -1; 390 } 391 392 struct vhost_user_reconnect { 393 struct sockaddr_un un; 394 int fd; 395 struct vhost_user_socket *vsocket; 396 397 TAILQ_ENTRY(vhost_user_reconnect) next; 398 }; 399 400 TAILQ_HEAD(vhost_user_reconnect_tailq_list, vhost_user_reconnect); 401 struct vhost_user_reconnect_list { 402 struct vhost_user_reconnect_tailq_list head; 403 pthread_mutex_t mutex; 404 }; 405 406 static struct vhost_user_reconnect_list reconn_list; 407 static pthread_t reconn_tid; 408 409 static int 410 vhost_user_connect_nonblock(int fd, struct sockaddr *un, size_t sz) 411 { 412 int ret, flags; 413 414 ret = connect(fd, un, sz); 415 if (ret < 0 && errno != EISCONN) 416 return -1; 417 418 flags = fcntl(fd, F_GETFL, 0); 419 if (flags < 0) { 420 RTE_LOG(ERR, VHOST_CONFIG, 421 "can't get flags for connfd %d\n", fd); 422 return -2; 423 } 424 if ((flags & O_NONBLOCK) && fcntl(fd, F_SETFL, flags & ~O_NONBLOCK)) { 425 RTE_LOG(ERR, VHOST_CONFIG, 426 "can't disable nonblocking on fd %d\n", fd); 427 return -2; 428 } 429 return 0; 430 } 431 432 static void * 433 vhost_user_client_reconnect(void *arg __rte_unused) 434 { 435 int ret; 436 struct vhost_user_reconnect *reconn, *next; 437 438 while (1) { 439 pthread_mutex_lock(&reconn_list.mutex); 440 441 /* 442 * An equal implementation of TAILQ_FOREACH_SAFE, 443 * which does not exist on all platforms. 444 */ 445 for (reconn = TAILQ_FIRST(&reconn_list.head); 446 reconn != NULL; reconn = next) { 447 next = TAILQ_NEXT(reconn, next); 448 449 ret = vhost_user_connect_nonblock(reconn->fd, 450 (struct sockaddr *)&reconn->un, 451 sizeof(reconn->un)); 452 if (ret == -2) { 453 close(reconn->fd); 454 RTE_LOG(ERR, VHOST_CONFIG, 455 "reconnection for fd %d failed\n", 456 reconn->fd); 457 goto remove_fd; 458 } 459 if (ret == -1) 460 continue; 461 462 RTE_LOG(INFO, VHOST_CONFIG, 463 "%s: connected\n", reconn->vsocket->path); 464 vhost_user_add_connection(reconn->fd, reconn->vsocket); 465 remove_fd: 466 TAILQ_REMOVE(&reconn_list.head, reconn, next); 467 free(reconn); 468 } 469 470 pthread_mutex_unlock(&reconn_list.mutex); 471 sleep(1); 472 } 473 474 return NULL; 475 } 476 477 static int 478 vhost_user_reconnect_init(void) 479 { 480 int ret; 481 482 ret = pthread_mutex_init(&reconn_list.mutex, NULL); 483 if (ret < 0) { 484 RTE_LOG(ERR, VHOST_CONFIG, "failed to initialize mutex"); 485 return ret; 486 } 487 TAILQ_INIT(&reconn_list.head); 488 489 ret = rte_ctrl_thread_create(&reconn_tid, "vhost_reconn", NULL, 490 vhost_user_client_reconnect, NULL); 491 if (ret != 0) { 492 RTE_LOG(ERR, VHOST_CONFIG, "failed to create reconnect thread"); 493 if (pthread_mutex_destroy(&reconn_list.mutex)) { 494 RTE_LOG(ERR, VHOST_CONFIG, 495 "failed to destroy reconnect mutex"); 496 } 497 } 498 499 return ret; 500 } 501 502 static int 503 vhost_user_start_client(struct vhost_user_socket *vsocket) 504 { 505 int ret; 506 int fd = vsocket->socket_fd; 507 const char *path = vsocket->path; 508 struct vhost_user_reconnect *reconn; 509 510 ret = vhost_user_connect_nonblock(fd, (struct sockaddr *)&vsocket->un, 511 sizeof(vsocket->un)); 512 if (ret == 0) { 513 vhost_user_add_connection(fd, vsocket); 514 return 0; 515 } 516 517 RTE_LOG(WARNING, VHOST_CONFIG, 518 "failed to connect to %s: %s\n", 519 path, strerror(errno)); 520 521 if (ret == -2 || !vsocket->reconnect) { 522 close(fd); 523 return -1; 524 } 525 526 RTE_LOG(INFO, VHOST_CONFIG, "%s: reconnecting...\n", path); 527 reconn = malloc(sizeof(*reconn)); 528 if (reconn == NULL) { 529 RTE_LOG(ERR, VHOST_CONFIG, 530 "failed to allocate memory for reconnect\n"); 531 close(fd); 532 return -1; 533 } 534 reconn->un = vsocket->un; 535 reconn->fd = fd; 536 reconn->vsocket = vsocket; 537 pthread_mutex_lock(&reconn_list.mutex); 538 TAILQ_INSERT_TAIL(&reconn_list.head, reconn, next); 539 pthread_mutex_unlock(&reconn_list.mutex); 540 541 return 0; 542 } 543 544 static struct vhost_user_socket * 545 find_vhost_user_socket(const char *path) 546 { 547 int i; 548 549 for (i = 0; i < vhost_user.vsocket_cnt; i++) { 550 struct vhost_user_socket *vsocket = vhost_user.vsockets[i]; 551 552 if (!strcmp(vsocket->path, path)) 553 return vsocket; 554 } 555 556 return NULL; 557 } 558 559 int 560 rte_vhost_driver_attach_vdpa_device(const char *path, int did) 561 { 562 struct vhost_user_socket *vsocket; 563 564 if (rte_vdpa_get_device(did) == NULL) 565 return -1; 566 567 pthread_mutex_lock(&vhost_user.mutex); 568 vsocket = find_vhost_user_socket(path); 569 if (vsocket) 570 vsocket->vdpa_dev_id = did; 571 pthread_mutex_unlock(&vhost_user.mutex); 572 573 return vsocket ? 0 : -1; 574 } 575 576 int 577 rte_vhost_driver_detach_vdpa_device(const char *path) 578 { 579 struct vhost_user_socket *vsocket; 580 581 pthread_mutex_lock(&vhost_user.mutex); 582 vsocket = find_vhost_user_socket(path); 583 if (vsocket) 584 vsocket->vdpa_dev_id = -1; 585 pthread_mutex_unlock(&vhost_user.mutex); 586 587 return vsocket ? 0 : -1; 588 } 589 590 int 591 rte_vhost_driver_get_vdpa_device_id(const char *path) 592 { 593 struct vhost_user_socket *vsocket; 594 int did = -1; 595 596 pthread_mutex_lock(&vhost_user.mutex); 597 vsocket = find_vhost_user_socket(path); 598 if (vsocket) 599 did = vsocket->vdpa_dev_id; 600 pthread_mutex_unlock(&vhost_user.mutex); 601 602 return did; 603 } 604 605 int 606 rte_vhost_driver_disable_features(const char *path, uint64_t features) 607 { 608 struct vhost_user_socket *vsocket; 609 610 pthread_mutex_lock(&vhost_user.mutex); 611 vsocket = find_vhost_user_socket(path); 612 613 /* Note that use_builtin_virtio_net is not affected by this function 614 * since callers may want to selectively disable features of the 615 * built-in vhost net device backend. 616 */ 617 618 if (vsocket) 619 vsocket->features &= ~features; 620 pthread_mutex_unlock(&vhost_user.mutex); 621 622 return vsocket ? 0 : -1; 623 } 624 625 int 626 rte_vhost_driver_enable_features(const char *path, uint64_t features) 627 { 628 struct vhost_user_socket *vsocket; 629 630 pthread_mutex_lock(&vhost_user.mutex); 631 vsocket = find_vhost_user_socket(path); 632 if (vsocket) { 633 if ((vsocket->supported_features & features) != features) { 634 /* 635 * trying to enable features the driver doesn't 636 * support. 637 */ 638 pthread_mutex_unlock(&vhost_user.mutex); 639 return -1; 640 } 641 vsocket->features |= features; 642 } 643 pthread_mutex_unlock(&vhost_user.mutex); 644 645 return vsocket ? 0 : -1; 646 } 647 648 int 649 rte_vhost_driver_set_features(const char *path, uint64_t features) 650 { 651 struct vhost_user_socket *vsocket; 652 653 pthread_mutex_lock(&vhost_user.mutex); 654 vsocket = find_vhost_user_socket(path); 655 if (vsocket) { 656 vsocket->supported_features = features; 657 vsocket->features = features; 658 659 /* Anyone setting feature bits is implementing their own vhost 660 * device backend. 661 */ 662 vsocket->use_builtin_virtio_net = false; 663 } 664 pthread_mutex_unlock(&vhost_user.mutex); 665 666 return vsocket ? 0 : -1; 667 } 668 669 int 670 rte_vhost_driver_get_features(const char *path, uint64_t *features) 671 { 672 struct vhost_user_socket *vsocket; 673 uint64_t vdpa_features; 674 struct rte_vdpa_device *vdpa_dev; 675 int did = -1; 676 int ret = 0; 677 678 pthread_mutex_lock(&vhost_user.mutex); 679 vsocket = find_vhost_user_socket(path); 680 if (!vsocket) { 681 RTE_LOG(ERR, VHOST_CONFIG, 682 "socket file %s is not registered yet.\n", path); 683 ret = -1; 684 goto unlock_exit; 685 } 686 687 did = vsocket->vdpa_dev_id; 688 vdpa_dev = rte_vdpa_get_device(did); 689 if (!vdpa_dev || !vdpa_dev->ops->get_features) { 690 *features = vsocket->features; 691 goto unlock_exit; 692 } 693 694 if (vdpa_dev->ops->get_features(did, &vdpa_features) < 0) { 695 RTE_LOG(ERR, VHOST_CONFIG, 696 "failed to get vdpa features " 697 "for socket file %s.\n", path); 698 ret = -1; 699 goto unlock_exit; 700 } 701 702 *features = vsocket->features & vdpa_features; 703 704 unlock_exit: 705 pthread_mutex_unlock(&vhost_user.mutex); 706 return ret; 707 } 708 709 int 710 rte_vhost_driver_get_protocol_features(const char *path, 711 uint64_t *protocol_features) 712 { 713 struct vhost_user_socket *vsocket; 714 uint64_t vdpa_protocol_features; 715 struct rte_vdpa_device *vdpa_dev; 716 int did = -1; 717 int ret = 0; 718 719 pthread_mutex_lock(&vhost_user.mutex); 720 vsocket = find_vhost_user_socket(path); 721 if (!vsocket) { 722 RTE_LOG(ERR, VHOST_CONFIG, 723 "socket file %s is not registered yet.\n", path); 724 ret = -1; 725 goto unlock_exit; 726 } 727 728 did = vsocket->vdpa_dev_id; 729 vdpa_dev = rte_vdpa_get_device(did); 730 if (!vdpa_dev || !vdpa_dev->ops->get_protocol_features) { 731 *protocol_features = vsocket->protocol_features; 732 goto unlock_exit; 733 } 734 735 if (vdpa_dev->ops->get_protocol_features(did, 736 &vdpa_protocol_features) < 0) { 737 RTE_LOG(ERR, VHOST_CONFIG, 738 "failed to get vdpa protocol features " 739 "for socket file %s.\n", path); 740 ret = -1; 741 goto unlock_exit; 742 } 743 744 *protocol_features = vsocket->protocol_features 745 & vdpa_protocol_features; 746 747 unlock_exit: 748 pthread_mutex_unlock(&vhost_user.mutex); 749 return ret; 750 } 751 752 int 753 rte_vhost_driver_get_queue_num(const char *path, uint32_t *queue_num) 754 { 755 struct vhost_user_socket *vsocket; 756 uint32_t vdpa_queue_num; 757 struct rte_vdpa_device *vdpa_dev; 758 int did = -1; 759 int ret = 0; 760 761 pthread_mutex_lock(&vhost_user.mutex); 762 vsocket = find_vhost_user_socket(path); 763 if (!vsocket) { 764 RTE_LOG(ERR, VHOST_CONFIG, 765 "socket file %s is not registered yet.\n", path); 766 ret = -1; 767 goto unlock_exit; 768 } 769 770 did = vsocket->vdpa_dev_id; 771 vdpa_dev = rte_vdpa_get_device(did); 772 if (!vdpa_dev || !vdpa_dev->ops->get_queue_num) { 773 *queue_num = VHOST_MAX_QUEUE_PAIRS; 774 goto unlock_exit; 775 } 776 777 if (vdpa_dev->ops->get_queue_num(did, &vdpa_queue_num) < 0) { 778 RTE_LOG(ERR, VHOST_CONFIG, 779 "failed to get vdpa queue number " 780 "for socket file %s.\n", path); 781 ret = -1; 782 goto unlock_exit; 783 } 784 785 *queue_num = RTE_MIN((uint32_t)VHOST_MAX_QUEUE_PAIRS, vdpa_queue_num); 786 787 unlock_exit: 788 pthread_mutex_unlock(&vhost_user.mutex); 789 return ret; 790 } 791 792 static void 793 vhost_user_socket_mem_free(struct vhost_user_socket *vsocket) 794 { 795 if (vsocket && vsocket->path) { 796 free(vsocket->path); 797 vsocket->path = NULL; 798 } 799 800 if (vsocket) { 801 free(vsocket); 802 vsocket = NULL; 803 } 804 } 805 806 /* 807 * Register a new vhost-user socket; here we could act as server 808 * (the default case), or client (when RTE_VHOST_USER_CLIENT) flag 809 * is set. 810 */ 811 int 812 rte_vhost_driver_register(const char *path, uint64_t flags) 813 { 814 int ret = -1; 815 struct vhost_user_socket *vsocket; 816 817 if (!path) 818 return -1; 819 820 pthread_mutex_lock(&vhost_user.mutex); 821 822 if (vhost_user.vsocket_cnt == MAX_VHOST_SOCKET) { 823 RTE_LOG(ERR, VHOST_CONFIG, 824 "error: the number of vhost sockets reaches maximum\n"); 825 goto out; 826 } 827 828 vsocket = malloc(sizeof(struct vhost_user_socket)); 829 if (!vsocket) 830 goto out; 831 memset(vsocket, 0, sizeof(struct vhost_user_socket)); 832 vsocket->path = strdup(path); 833 if (vsocket->path == NULL) { 834 RTE_LOG(ERR, VHOST_CONFIG, 835 "error: failed to copy socket path string\n"); 836 vhost_user_socket_mem_free(vsocket); 837 goto out; 838 } 839 TAILQ_INIT(&vsocket->conn_list); 840 ret = pthread_mutex_init(&vsocket->conn_mutex, NULL); 841 if (ret) { 842 RTE_LOG(ERR, VHOST_CONFIG, 843 "error: failed to init connection mutex\n"); 844 goto out_free; 845 } 846 vsocket->dequeue_zero_copy = flags & RTE_VHOST_USER_DEQUEUE_ZERO_COPY; 847 848 /* 849 * Set the supported features correctly for the builtin vhost-user 850 * net driver. 851 * 852 * Applications know nothing about features the builtin virtio net 853 * driver (virtio_net.c) supports, thus it's not possible for them 854 * to invoke rte_vhost_driver_set_features(). To workaround it, here 855 * we set it unconditionally. If the application want to implement 856 * another vhost-user driver (say SCSI), it should call the 857 * rte_vhost_driver_set_features(), which will overwrite following 858 * two values. 859 */ 860 vsocket->use_builtin_virtio_net = true; 861 vsocket->supported_features = VIRTIO_NET_SUPPORTED_FEATURES; 862 vsocket->features = VIRTIO_NET_SUPPORTED_FEATURES; 863 vsocket->protocol_features = VHOST_USER_PROTOCOL_FEATURES; 864 865 /* 866 * Dequeue zero copy can't assure descriptors returned in order. 867 * Also, it requires that the guest memory is populated, which is 868 * not compatible with postcopy. 869 */ 870 if (vsocket->dequeue_zero_copy) { 871 vsocket->supported_features &= ~(1ULL << VIRTIO_F_IN_ORDER); 872 vsocket->features &= ~(1ULL << VIRTIO_F_IN_ORDER); 873 874 RTE_LOG(INFO, VHOST_CONFIG, 875 "Dequeue zero copy requested, disabling postcopy support\n"); 876 vsocket->protocol_features &= 877 ~(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT); 878 } 879 880 if (!(flags & RTE_VHOST_USER_IOMMU_SUPPORT)) { 881 vsocket->supported_features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM); 882 vsocket->features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM); 883 } 884 885 if (!(flags & RTE_VHOST_USER_POSTCOPY_SUPPORT)) { 886 vsocket->protocol_features &= 887 ~(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT); 888 } else { 889 #ifndef RTE_LIBRTE_VHOST_POSTCOPY 890 RTE_LOG(ERR, VHOST_CONFIG, 891 "Postcopy requested but not compiled\n"); 892 ret = -1; 893 goto out_mutex; 894 #endif 895 } 896 897 if ((flags & RTE_VHOST_USER_CLIENT) != 0) { 898 vsocket->reconnect = !(flags & RTE_VHOST_USER_NO_RECONNECT); 899 if (vsocket->reconnect && reconn_tid == 0) { 900 if (vhost_user_reconnect_init() != 0) 901 goto out_mutex; 902 } 903 } else { 904 vsocket->is_server = true; 905 } 906 ret = create_unix_socket(vsocket); 907 if (ret < 0) { 908 goto out_mutex; 909 } 910 911 vhost_user.vsockets[vhost_user.vsocket_cnt++] = vsocket; 912 913 pthread_mutex_unlock(&vhost_user.mutex); 914 return ret; 915 916 out_mutex: 917 if (pthread_mutex_destroy(&vsocket->conn_mutex)) { 918 RTE_LOG(ERR, VHOST_CONFIG, 919 "error: failed to destroy connection mutex\n"); 920 } 921 out_free: 922 vhost_user_socket_mem_free(vsocket); 923 out: 924 pthread_mutex_unlock(&vhost_user.mutex); 925 926 return ret; 927 } 928 929 static bool 930 vhost_user_remove_reconnect(struct vhost_user_socket *vsocket) 931 { 932 int found = false; 933 struct vhost_user_reconnect *reconn, *next; 934 935 pthread_mutex_lock(&reconn_list.mutex); 936 937 for (reconn = TAILQ_FIRST(&reconn_list.head); 938 reconn != NULL; reconn = next) { 939 next = TAILQ_NEXT(reconn, next); 940 941 if (reconn->vsocket == vsocket) { 942 TAILQ_REMOVE(&reconn_list.head, reconn, next); 943 close(reconn->fd); 944 free(reconn); 945 found = true; 946 break; 947 } 948 } 949 pthread_mutex_unlock(&reconn_list.mutex); 950 return found; 951 } 952 953 /** 954 * Unregister the specified vhost socket 955 */ 956 int 957 rte_vhost_driver_unregister(const char *path) 958 { 959 int i; 960 int count; 961 struct vhost_user_connection *conn, *next; 962 963 pthread_mutex_lock(&vhost_user.mutex); 964 965 for (i = 0; i < vhost_user.vsocket_cnt; i++) { 966 struct vhost_user_socket *vsocket = vhost_user.vsockets[i]; 967 968 if (!strcmp(vsocket->path, path)) { 969 again: 970 pthread_mutex_lock(&vsocket->conn_mutex); 971 for (conn = TAILQ_FIRST(&vsocket->conn_list); 972 conn != NULL; 973 conn = next) { 974 next = TAILQ_NEXT(conn, next); 975 976 /* 977 * If r/wcb is executing, release the 978 * conn_mutex lock, and try again since 979 * the r/wcb may use the conn_mutex lock. 980 */ 981 if (fdset_try_del(&vhost_user.fdset, 982 conn->connfd) == -1) { 983 pthread_mutex_unlock( 984 &vsocket->conn_mutex); 985 goto again; 986 } 987 988 RTE_LOG(INFO, VHOST_CONFIG, 989 "free connfd = %d for device '%s'\n", 990 conn->connfd, path); 991 close(conn->connfd); 992 vhost_destroy_device(conn->vid); 993 TAILQ_REMOVE(&vsocket->conn_list, conn, next); 994 free(conn); 995 } 996 pthread_mutex_unlock(&vsocket->conn_mutex); 997 998 if (vsocket->is_server) { 999 fdset_del(&vhost_user.fdset, 1000 vsocket->socket_fd); 1001 close(vsocket->socket_fd); 1002 unlink(path); 1003 } else if (vsocket->reconnect) { 1004 vhost_user_remove_reconnect(vsocket); 1005 } 1006 1007 pthread_mutex_destroy(&vsocket->conn_mutex); 1008 vhost_user_socket_mem_free(vsocket); 1009 1010 count = --vhost_user.vsocket_cnt; 1011 vhost_user.vsockets[i] = vhost_user.vsockets[count]; 1012 vhost_user.vsockets[count] = NULL; 1013 pthread_mutex_unlock(&vhost_user.mutex); 1014 1015 return 0; 1016 } 1017 } 1018 pthread_mutex_unlock(&vhost_user.mutex); 1019 1020 return -1; 1021 } 1022 1023 /* 1024 * Register ops so that we can add/remove device to data core. 1025 */ 1026 int 1027 rte_vhost_driver_callback_register(const char *path, 1028 struct vhost_device_ops const * const ops) 1029 { 1030 struct vhost_user_socket *vsocket; 1031 1032 pthread_mutex_lock(&vhost_user.mutex); 1033 vsocket = find_vhost_user_socket(path); 1034 if (vsocket) 1035 vsocket->notify_ops = ops; 1036 pthread_mutex_unlock(&vhost_user.mutex); 1037 1038 return vsocket ? 0 : -1; 1039 } 1040 1041 struct vhost_device_ops const * 1042 vhost_driver_callback_get(const char *path) 1043 { 1044 struct vhost_user_socket *vsocket; 1045 1046 pthread_mutex_lock(&vhost_user.mutex); 1047 vsocket = find_vhost_user_socket(path); 1048 pthread_mutex_unlock(&vhost_user.mutex); 1049 1050 return vsocket ? vsocket->notify_ops : NULL; 1051 } 1052 1053 int 1054 rte_vhost_driver_start(const char *path) 1055 { 1056 struct vhost_user_socket *vsocket; 1057 static pthread_t fdset_tid; 1058 1059 pthread_mutex_lock(&vhost_user.mutex); 1060 vsocket = find_vhost_user_socket(path); 1061 pthread_mutex_unlock(&vhost_user.mutex); 1062 1063 if (!vsocket) 1064 return -1; 1065 1066 if (fdset_tid == 0) { 1067 /** 1068 * create a pipe which will be waited by poll and notified to 1069 * rebuild the wait list of poll. 1070 */ 1071 if (fdset_pipe_init(&vhost_user.fdset) < 0) { 1072 RTE_LOG(ERR, VHOST_CONFIG, 1073 "failed to create pipe for vhost fdset\n"); 1074 return -1; 1075 } 1076 1077 int ret = rte_ctrl_thread_create(&fdset_tid, 1078 "vhost-events", NULL, fdset_event_dispatch, 1079 &vhost_user.fdset); 1080 if (ret != 0) { 1081 RTE_LOG(ERR, VHOST_CONFIG, 1082 "failed to create fdset handling thread"); 1083 1084 fdset_pipe_uninit(&vhost_user.fdset); 1085 return -1; 1086 } 1087 } 1088 1089 if (vsocket->is_server) 1090 return vhost_user_start_server(vsocket); 1091 else 1092 return vhost_user_start_client(vsocket); 1093 } 1094