1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2016 Intel Corporation 3 */ 4 5 #include <sys/types.h> 6 #include <sys/stat.h> 7 #include <fcntl.h> 8 #include <unistd.h> 9 #include <errno.h> 10 11 #include <rte_memory.h> 12 13 #include "vhost.h" 14 #include "virtio_user_dev.h" 15 #include "vhost_kernel_tap.h" 16 17 struct vhost_kernel_data { 18 int *vhostfds; 19 int *tapfds; 20 }; 21 22 struct vhost_memory_kernel { 23 uint32_t nregions; 24 uint32_t padding; 25 struct vhost_memory_region regions[0]; 26 }; 27 28 /* vhost kernel ioctls */ 29 #define VHOST_VIRTIO 0xAF 30 #define VHOST_GET_FEATURES _IOR(VHOST_VIRTIO, 0x00, __u64) 31 #define VHOST_SET_FEATURES _IOW(VHOST_VIRTIO, 0x00, __u64) 32 #define VHOST_SET_OWNER _IO(VHOST_VIRTIO, 0x01) 33 #define VHOST_RESET_OWNER _IO(VHOST_VIRTIO, 0x02) 34 #define VHOST_SET_MEM_TABLE _IOW(VHOST_VIRTIO, 0x03, struct vhost_memory_kernel) 35 #define VHOST_SET_LOG_BASE _IOW(VHOST_VIRTIO, 0x04, __u64) 36 #define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int) 37 #define VHOST_SET_VRING_NUM _IOW(VHOST_VIRTIO, 0x10, struct vhost_vring_state) 38 #define VHOST_SET_VRING_ADDR _IOW(VHOST_VIRTIO, 0x11, struct vhost_vring_addr) 39 #define VHOST_SET_VRING_BASE _IOW(VHOST_VIRTIO, 0x12, struct vhost_vring_state) 40 #define VHOST_GET_VRING_BASE _IOWR(VHOST_VIRTIO, 0x12, struct vhost_vring_state) 41 #define VHOST_SET_VRING_KICK _IOW(VHOST_VIRTIO, 0x20, struct vhost_vring_file) 42 #define VHOST_SET_VRING_CALL _IOW(VHOST_VIRTIO, 0x21, struct vhost_vring_file) 43 #define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct vhost_vring_file) 44 #define VHOST_NET_SET_BACKEND _IOW(VHOST_VIRTIO, 0x30, struct vhost_vring_file) 45 46 /* with below features, vhost kernel does not need to do the checksum and TSO, 47 * these info will be passed to virtio_user through virtio net header. 48 */ 49 #define VHOST_KERNEL_GUEST_OFFLOADS_MASK \ 50 ((1ULL << VIRTIO_NET_F_GUEST_CSUM) | \ 51 (1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ 52 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ 53 (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ 54 (1ULL << VIRTIO_NET_F_GUEST_UFO)) 55 56 /* with below features, when flows from virtio_user to vhost kernel 57 * (1) if flows goes up through the kernel networking stack, it does not need 58 * to verify checksum, which can save CPU cycles; 59 * (2) if flows goes through a Linux bridge and outside from an interface 60 * (kernel driver), checksum and TSO will be done by GSO in kernel or even 61 * offloaded into real physical device. 62 */ 63 #define VHOST_KERNEL_HOST_OFFLOADS_MASK \ 64 ((1ULL << VIRTIO_NET_F_HOST_TSO4) | \ 65 (1ULL << VIRTIO_NET_F_HOST_TSO6) | \ 66 (1ULL << VIRTIO_NET_F_CSUM)) 67 68 static uint64_t max_regions = 64; 69 70 static void 71 get_vhost_kernel_max_regions(void) 72 { 73 int fd; 74 char buf[20] = {'\0'}; 75 76 fd = open("/sys/module/vhost/parameters/max_mem_regions", O_RDONLY); 77 if (fd < 0) 78 return; 79 80 if (read(fd, buf, sizeof(buf) - 1) > 0) 81 max_regions = strtoull(buf, NULL, 10); 82 83 close(fd); 84 } 85 86 static int 87 vhost_kernel_ioctl(int fd, uint64_t request, void *arg) 88 { 89 int ret; 90 91 ret = ioctl(fd, request, arg); 92 if (ret) { 93 PMD_DRV_LOG(ERR, "Vhost-kernel ioctl %"PRIu64" failed (%s)", 94 request, strerror(errno)); 95 return -1; 96 } 97 98 return 0; 99 } 100 101 static int 102 vhost_kernel_set_owner(struct virtio_user_dev *dev) 103 { 104 int ret; 105 uint32_t i; 106 struct vhost_kernel_data *data = dev->backend_data; 107 108 for (i = 0; i < dev->max_queue_pairs; ++i) { 109 if (data->vhostfds[i] < 0) 110 continue; 111 112 ret = vhost_kernel_ioctl(data->vhostfds[i], VHOST_SET_OWNER, NULL); 113 if (ret < 0) 114 return ret; 115 } 116 117 return 0; 118 } 119 120 static int 121 vhost_kernel_get_features(struct virtio_user_dev *dev, uint64_t *features) 122 { 123 int ret; 124 unsigned int tap_features; 125 struct vhost_kernel_data *data = dev->backend_data; 126 127 ret = vhost_kernel_ioctl(data->vhostfds[0], VHOST_GET_FEATURES, features); 128 if (ret < 0) { 129 PMD_DRV_LOG(ERR, "Failed to get features"); 130 return -1; 131 } 132 133 ret = tap_support_features(&tap_features); 134 if (ret < 0) { 135 PMD_DRV_LOG(ERR, "Failed to get TAP features"); 136 return -1; 137 } 138 139 /* with tap as the backend, all these features are supported 140 * but not claimed by vhost-net, so we add them back when 141 * reporting to upper layer. 142 */ 143 if (tap_features & IFF_VNET_HDR) { 144 *features |= VHOST_KERNEL_GUEST_OFFLOADS_MASK; 145 *features |= VHOST_KERNEL_HOST_OFFLOADS_MASK; 146 } 147 148 /* vhost_kernel will not declare this feature, but it does 149 * support multi-queue. 150 */ 151 if (tap_features & IFF_MULTI_QUEUE) 152 *features |= (1ull << VIRTIO_NET_F_MQ); 153 154 return 0; 155 } 156 157 static int 158 vhost_kernel_set_features(struct virtio_user_dev *dev, uint64_t features) 159 { 160 struct vhost_kernel_data *data = dev->backend_data; 161 162 /* We don't need memory protection here */ 163 features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM); 164 /* VHOST kernel does not know about below flags */ 165 features &= ~VHOST_KERNEL_GUEST_OFFLOADS_MASK; 166 features &= ~VHOST_KERNEL_HOST_OFFLOADS_MASK; 167 features &= ~(1ULL << VIRTIO_NET_F_MQ); 168 169 return vhost_kernel_ioctl(data->vhostfds[0], VHOST_SET_FEATURES, &features); 170 } 171 172 static int 173 add_memseg_list(const struct rte_memseg_list *msl, void *arg) 174 { 175 struct vhost_memory_kernel *vm = arg; 176 struct vhost_memory_region *mr; 177 void *start_addr; 178 uint64_t len; 179 180 if (msl->external) 181 return 0; 182 183 if (vm->nregions >= max_regions) 184 return -1; 185 186 start_addr = msl->base_va; 187 len = msl->page_sz * msl->memseg_arr.len; 188 189 mr = &vm->regions[vm->nregions++]; 190 191 mr->guest_phys_addr = (uint64_t)(uintptr_t)start_addr; 192 mr->userspace_addr = (uint64_t)(uintptr_t)start_addr; 193 mr->memory_size = len; 194 mr->mmap_offset = 0; /* flags_padding */ 195 196 PMD_DRV_LOG(DEBUG, "index=%u addr=%p len=%" PRIu64, 197 vm->nregions - 1, start_addr, len); 198 199 return 0; 200 } 201 202 /* By default, vhost kernel module allows 64 regions, but DPDK may 203 * have much more memory regions. Below function will treat each 204 * contiguous memory space reserved by DPDK as one region. 205 */ 206 static int 207 vhost_kernel_set_memory_table(struct virtio_user_dev *dev) 208 { 209 struct vhost_kernel_data *data = dev->backend_data; 210 struct vhost_memory_kernel *vm; 211 int ret; 212 213 vm = malloc(sizeof(struct vhost_memory_kernel) + 214 max_regions * 215 sizeof(struct vhost_memory_region)); 216 if (!vm) 217 goto err; 218 219 vm->nregions = 0; 220 vm->padding = 0; 221 222 /* 223 * The memory lock has already been taken by memory subsystem 224 * or virtio_user_start_device(). 225 */ 226 ret = rte_memseg_list_walk_thread_unsafe(add_memseg_list, vm); 227 if (ret < 0) 228 goto err_free; 229 230 ret = vhost_kernel_ioctl(data->vhostfds[0], VHOST_SET_MEM_TABLE, vm); 231 if (ret < 0) 232 goto err_free; 233 234 free(vm); 235 236 return 0; 237 err_free: 238 free(vm); 239 err: 240 PMD_DRV_LOG(ERR, "Failed to set memory table"); 241 return -1; 242 } 243 244 static int 245 vhost_kernel_set_vring(struct virtio_user_dev *dev, uint64_t req, struct vhost_vring_state *state) 246 { 247 int ret, fd; 248 unsigned int index = state->index; 249 struct vhost_kernel_data *data = dev->backend_data; 250 251 /* Convert from queue index to queue-pair & offset */ 252 fd = data->vhostfds[state->index / 2]; 253 state->index %= 2; 254 255 ret = vhost_kernel_ioctl(fd, req, state); 256 if (ret < 0) { 257 PMD_DRV_LOG(ERR, "Failed to set vring (request %" PRIu64 ")", req); 258 return -1; 259 } 260 261 /* restore index back to queue index */ 262 state->index = index; 263 264 return 0; 265 } 266 267 static int 268 vhost_kernel_set_vring_num(struct virtio_user_dev *dev, struct vhost_vring_state *state) 269 { 270 return vhost_kernel_set_vring(dev, VHOST_SET_VRING_NUM, state); 271 } 272 273 static int 274 vhost_kernel_set_vring_base(struct virtio_user_dev *dev, struct vhost_vring_state *state) 275 { 276 return vhost_kernel_set_vring(dev, VHOST_SET_VRING_BASE, state); 277 } 278 279 static int 280 vhost_kernel_get_vring_base(struct virtio_user_dev *dev, struct vhost_vring_state *state) 281 { 282 return vhost_kernel_set_vring(dev, VHOST_GET_VRING_BASE, state); 283 } 284 285 static int 286 vhost_kernel_set_vring_file(struct virtio_user_dev *dev, uint64_t req, 287 struct vhost_vring_file *file) 288 { 289 int ret, fd; 290 unsigned int index = file->index; 291 struct vhost_kernel_data *data = dev->backend_data; 292 293 /* Convert from queue index to queue-pair & offset */ 294 fd = data->vhostfds[file->index / 2]; 295 file->index %= 2; 296 297 ret = vhost_kernel_ioctl(fd, req, file); 298 if (ret < 0) { 299 PMD_DRV_LOG(ERR, "Failed to set vring file (request %" PRIu64 ")", req); 300 return -1; 301 } 302 303 /* restore index back to queue index */ 304 file->index = index; 305 306 return 0; 307 } 308 309 static int 310 vhost_kernel_set_vring_kick(struct virtio_user_dev *dev, struct vhost_vring_file *file) 311 { 312 return vhost_kernel_set_vring_file(dev, VHOST_SET_VRING_KICK, file); 313 } 314 315 static int 316 vhost_kernel_set_vring_call(struct virtio_user_dev *dev, struct vhost_vring_file *file) 317 { 318 return vhost_kernel_set_vring_file(dev, VHOST_SET_VRING_CALL, file); 319 } 320 321 static int 322 vhost_kernel_set_vring_addr(struct virtio_user_dev *dev, struct vhost_vring_addr *addr) 323 { 324 int ret, fd; 325 unsigned int index = addr->index; 326 struct vhost_kernel_data *data = dev->backend_data; 327 328 /* Convert from queue index to queue-pair & offset */ 329 fd = data->vhostfds[addr->index / 2]; 330 addr->index %= 2; 331 332 ret = vhost_kernel_ioctl(fd, VHOST_SET_VRING_ADDR, addr); 333 if (ret < 0) { 334 PMD_DRV_LOG(ERR, "Failed to set vring address"); 335 return -1; 336 } 337 338 /* restore index back to queue index */ 339 addr->index = index; 340 341 return 0; 342 } 343 344 static int 345 vhost_kernel_get_status(struct virtio_user_dev *dev __rte_unused, uint8_t *status __rte_unused) 346 { 347 return -ENOTSUP; 348 } 349 350 static int 351 vhost_kernel_set_status(struct virtio_user_dev *dev __rte_unused, uint8_t status __rte_unused) 352 { 353 return -ENOTSUP; 354 } 355 356 /** 357 * Set up environment to talk with a vhost kernel backend. 358 * 359 * @return 360 * - (-1) if fail to set up; 361 * - (>=0) if successful. 362 */ 363 static int 364 vhost_kernel_setup(struct virtio_user_dev *dev) 365 { 366 int vhostfd; 367 uint32_t q, i; 368 struct vhost_kernel_data *data; 369 370 data = malloc(sizeof(*data)); 371 if (!data) { 372 PMD_INIT_LOG(ERR, "(%s) Failed to allocate Vhost-kernel data", dev->path); 373 return -1; 374 } 375 376 data->vhostfds = malloc(dev->max_queue_pairs * sizeof(int)); 377 if (!data->vhostfds) { 378 PMD_INIT_LOG(ERR, "(%s) Failed to allocate Vhost FDs", dev->path); 379 goto err_data; 380 } 381 data->tapfds = malloc(dev->max_queue_pairs * sizeof(int)); 382 if (!data->tapfds) { 383 PMD_INIT_LOG(ERR, "(%s) Failed to allocate TAP FDs", dev->path); 384 goto err_vhostfds; 385 } 386 387 for (q = 0; q < dev->max_queue_pairs; ++q) { 388 data->vhostfds[q] = -1; 389 data->tapfds[q] = -1; 390 } 391 392 get_vhost_kernel_max_regions(); 393 394 for (i = 0; i < dev->max_queue_pairs; ++i) { 395 vhostfd = open(dev->path, O_RDWR); 396 if (vhostfd < 0) { 397 PMD_DRV_LOG(ERR, "fail to open %s, %s", dev->path, strerror(errno)); 398 goto err_tapfds; 399 } 400 401 data->vhostfds[i] = vhostfd; 402 } 403 404 dev->backend_data = data; 405 406 return 0; 407 408 err_tapfds: 409 for (i = 0; i < dev->max_queue_pairs; i++) 410 if (data->vhostfds[i] >= 0) 411 close(data->vhostfds[i]); 412 413 free(data->tapfds); 414 err_vhostfds: 415 free(data->vhostfds); 416 err_data: 417 free(data); 418 419 return -1; 420 } 421 422 static int 423 vhost_kernel_destroy(struct virtio_user_dev *dev) 424 { 425 struct vhost_kernel_data *data = dev->backend_data; 426 uint32_t i; 427 428 if (!data) 429 return 0; 430 431 for (i = 0; i < dev->max_queue_pairs; ++i) { 432 if (data->vhostfds[i] >= 0) 433 close(data->vhostfds[i]); 434 if (data->tapfds[i] >= 0) 435 close(data->tapfds[i]); 436 } 437 438 free(data->vhostfds); 439 free(data->tapfds); 440 free(data); 441 dev->backend_data = NULL; 442 443 return 0; 444 } 445 446 static int 447 vhost_kernel_set_backend(int vhostfd, int tapfd) 448 { 449 struct vhost_vring_file f; 450 451 f.fd = tapfd; 452 f.index = 0; 453 if (ioctl(vhostfd, VHOST_NET_SET_BACKEND, &f) < 0) { 454 PMD_DRV_LOG(ERR, "VHOST_NET_SET_BACKEND fails, %s", 455 strerror(errno)); 456 return -1; 457 } 458 459 f.index = 1; 460 if (ioctl(vhostfd, VHOST_NET_SET_BACKEND, &f) < 0) { 461 PMD_DRV_LOG(ERR, "VHOST_NET_SET_BACKEND fails, %s", 462 strerror(errno)); 463 return -1; 464 } 465 466 return 0; 467 } 468 469 static int 470 vhost_kernel_enable_queue_pair(struct virtio_user_dev *dev, 471 uint16_t pair_idx, 472 int enable) 473 { 474 int hdr_size; 475 int vhostfd; 476 int tapfd; 477 int req_mq = (dev->max_queue_pairs > 1); 478 struct vhost_kernel_data *data = dev->backend_data; 479 480 vhostfd = data->vhostfds[pair_idx]; 481 482 if (dev->qp_enabled[pair_idx] == enable) 483 return 0; 484 485 if (!enable) { 486 tapfd = data->tapfds[pair_idx]; 487 if (vhost_kernel_set_backend(vhostfd, -1) < 0) { 488 PMD_DRV_LOG(ERR, "fail to set backend for vhost kernel"); 489 return -1; 490 } 491 if (req_mq && vhost_kernel_tap_set_queue(tapfd, false) < 0) { 492 PMD_DRV_LOG(ERR, "fail to disable tap for vhost kernel"); 493 return -1; 494 } 495 dev->qp_enabled[pair_idx] = false; 496 return 0; 497 } 498 499 if (data->tapfds[pair_idx] >= 0) { 500 tapfd = data->tapfds[pair_idx]; 501 if (vhost_kernel_tap_set_offload(tapfd, dev->features) == -1) 502 return -1; 503 if (req_mq && vhost_kernel_tap_set_queue(tapfd, true) < 0) { 504 PMD_DRV_LOG(ERR, "fail to enable tap for vhost kernel"); 505 return -1; 506 } 507 goto set_backend; 508 } 509 510 if ((dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF)) || 511 (dev->features & (1ULL << VIRTIO_F_VERSION_1))) 512 hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf); 513 else 514 hdr_size = sizeof(struct virtio_net_hdr); 515 516 tapfd = vhost_kernel_open_tap(&dev->ifname, hdr_size, req_mq, 517 (char *)dev->mac_addr, dev->features); 518 if (tapfd < 0) { 519 PMD_DRV_LOG(ERR, "fail to open tap for vhost kernel"); 520 return -1; 521 } 522 523 data->tapfds[pair_idx] = tapfd; 524 525 set_backend: 526 if (vhost_kernel_set_backend(vhostfd, tapfd) < 0) { 527 PMD_DRV_LOG(ERR, "fail to set backend for vhost kernel"); 528 return -1; 529 } 530 531 dev->qp_enabled[pair_idx] = true; 532 return 0; 533 } 534 535 static int 536 vhost_kernel_get_backend_features(uint64_t *features) 537 { 538 *features = 0; 539 540 return 0; 541 } 542 543 static int 544 vhost_kernel_update_link_state(struct virtio_user_dev *dev __rte_unused) 545 { 546 /* Nothing to update (Maybe get TAP interface link state?) */ 547 return 0; 548 } 549 550 static int 551 vhost_kernel_get_intr_fd(struct virtio_user_dev *dev __rte_unused) 552 { 553 /* No link state interrupt with Vhost-kernel */ 554 return -1; 555 } 556 557 struct virtio_user_backend_ops virtio_ops_kernel = { 558 .setup = vhost_kernel_setup, 559 .destroy = vhost_kernel_destroy, 560 .get_backend_features = vhost_kernel_get_backend_features, 561 .set_owner = vhost_kernel_set_owner, 562 .get_features = vhost_kernel_get_features, 563 .set_features = vhost_kernel_set_features, 564 .set_memory_table = vhost_kernel_set_memory_table, 565 .set_vring_num = vhost_kernel_set_vring_num, 566 .set_vring_base = vhost_kernel_set_vring_base, 567 .get_vring_base = vhost_kernel_get_vring_base, 568 .set_vring_call = vhost_kernel_set_vring_call, 569 .set_vring_kick = vhost_kernel_set_vring_kick, 570 .set_vring_addr = vhost_kernel_set_vring_addr, 571 .get_status = vhost_kernel_get_status, 572 .set_status = vhost_kernel_set_status, 573 .enable_qp = vhost_kernel_enable_queue_pair, 574 .update_link_state = vhost_kernel_update_link_state, 575 .get_intr_fd = vhost_kernel_get_intr_fd, 576 }; 577