1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2016 IGEL Co., Ltd. 3 * Copyright(c) 2016-2018 Intel Corporation 4 */ 5 #include <unistd.h> 6 #include <pthread.h> 7 #include <stdbool.h> 8 #include <sys/epoll.h> 9 10 #include <rte_mbuf.h> 11 #include <ethdev_driver.h> 12 #include <ethdev_vdev.h> 13 #include <rte_malloc.h> 14 #include <rte_memcpy.h> 15 #include <rte_bus_vdev.h> 16 #include <rte_kvargs.h> 17 #include <rte_vhost.h> 18 #include <rte_spinlock.h> 19 20 #include "rte_eth_vhost.h" 21 22 RTE_LOG_REGISTER_DEFAULT(vhost_logtype, NOTICE); 23 24 #define VHOST_LOG(level, ...) \ 25 rte_log(RTE_LOG_ ## level, vhost_logtype, __VA_ARGS__) 26 27 enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM}; 28 29 #define ETH_VHOST_IFACE_ARG "iface" 30 #define ETH_VHOST_QUEUES_ARG "queues" 31 #define ETH_VHOST_CLIENT_ARG "client" 32 #define ETH_VHOST_IOMMU_SUPPORT "iommu-support" 33 #define ETH_VHOST_POSTCOPY_SUPPORT "postcopy-support" 34 #define ETH_VHOST_VIRTIO_NET_F_HOST_TSO "tso" 35 #define ETH_VHOST_LINEAR_BUF "linear-buffer" 36 #define ETH_VHOST_EXT_BUF "ext-buffer" 37 #define VHOST_MAX_PKT_BURST 32 38 39 static const char *valid_arguments[] = { 40 ETH_VHOST_IFACE_ARG, 41 ETH_VHOST_QUEUES_ARG, 42 ETH_VHOST_CLIENT_ARG, 43 ETH_VHOST_IOMMU_SUPPORT, 44 ETH_VHOST_POSTCOPY_SUPPORT, 45 ETH_VHOST_VIRTIO_NET_F_HOST_TSO, 46 ETH_VHOST_LINEAR_BUF, 47 ETH_VHOST_EXT_BUF, 48 NULL 49 }; 50 51 static struct rte_ether_addr base_eth_addr = { 52 .addr_bytes = { 53 0x56 /* V */, 54 0x48 /* H */, 55 0x4F /* O */, 56 0x53 /* S */, 57 0x54 /* T */, 58 0x00 59 } 60 }; 61 62 enum vhost_xstats_pkts { 63 VHOST_UNDERSIZE_PKT = 0, 64 VHOST_64_PKT, 65 VHOST_65_TO_127_PKT, 66 VHOST_128_TO_255_PKT, 67 VHOST_256_TO_511_PKT, 68 VHOST_512_TO_1023_PKT, 69 VHOST_1024_TO_1522_PKT, 70 VHOST_1523_TO_MAX_PKT, 71 VHOST_BROADCAST_PKT, 72 VHOST_MULTICAST_PKT, 73 VHOST_UNICAST_PKT, 74 VHOST_PKT, 75 VHOST_BYTE, 76 VHOST_MISSED_PKT, 77 VHOST_ERRORS_PKT, 78 VHOST_ERRORS_FRAGMENTED, 79 VHOST_ERRORS_JABBER, 80 VHOST_UNKNOWN_PROTOCOL, 81 VHOST_XSTATS_MAX, 82 }; 83 84 struct vhost_stats { 85 uint64_t pkts; 86 uint64_t bytes; 87 uint64_t missed_pkts; 88 uint64_t xstats[VHOST_XSTATS_MAX]; 89 }; 90 91 struct vhost_queue { 92 int vid; 93 rte_atomic32_t allow_queuing; 94 rte_atomic32_t while_queuing; 95 struct pmd_internal *internal; 96 struct rte_mempool *mb_pool; 97 uint16_t port; 98 uint16_t virtqueue_id; 99 struct vhost_stats stats; 100 int intr_enable; 101 rte_spinlock_t intr_lock; 102 }; 103 104 struct pmd_internal { 105 rte_atomic32_t dev_attached; 106 char *iface_name; 107 uint64_t flags; 108 uint64_t disable_flags; 109 uint16_t max_queues; 110 int vid; 111 rte_atomic32_t started; 112 uint8_t vlan_strip; 113 }; 114 115 struct internal_list { 116 TAILQ_ENTRY(internal_list) next; 117 struct rte_eth_dev *eth_dev; 118 }; 119 120 TAILQ_HEAD(internal_list_head, internal_list); 121 static struct internal_list_head internal_list = 122 TAILQ_HEAD_INITIALIZER(internal_list); 123 124 static pthread_mutex_t internal_list_lock = PTHREAD_MUTEX_INITIALIZER; 125 126 static struct rte_eth_link pmd_link = { 127 .link_speed = 10000, 128 .link_duplex = RTE_ETH_LINK_FULL_DUPLEX, 129 .link_status = RTE_ETH_LINK_DOWN 130 }; 131 132 struct rte_vhost_vring_state { 133 rte_spinlock_t lock; 134 135 bool cur[RTE_MAX_QUEUES_PER_PORT * 2]; 136 bool seen[RTE_MAX_QUEUES_PER_PORT * 2]; 137 unsigned int index; 138 unsigned int max_vring; 139 }; 140 141 static struct rte_vhost_vring_state *vring_states[RTE_MAX_ETHPORTS]; 142 143 #define VHOST_XSTATS_NAME_SIZE 64 144 145 struct vhost_xstats_name_off { 146 char name[VHOST_XSTATS_NAME_SIZE]; 147 uint64_t offset; 148 }; 149 150 /* [rx]_is prepended to the name string here */ 151 static const struct vhost_xstats_name_off vhost_rxport_stat_strings[] = { 152 {"good_packets", 153 offsetof(struct vhost_queue, stats.xstats[VHOST_PKT])}, 154 {"total_bytes", 155 offsetof(struct vhost_queue, stats.xstats[VHOST_BYTE])}, 156 {"missed_pkts", 157 offsetof(struct vhost_queue, stats.xstats[VHOST_MISSED_PKT])}, 158 {"broadcast_packets", 159 offsetof(struct vhost_queue, stats.xstats[VHOST_BROADCAST_PKT])}, 160 {"multicast_packets", 161 offsetof(struct vhost_queue, stats.xstats[VHOST_MULTICAST_PKT])}, 162 {"unicast_packets", 163 offsetof(struct vhost_queue, stats.xstats[VHOST_UNICAST_PKT])}, 164 {"undersize_packets", 165 offsetof(struct vhost_queue, stats.xstats[VHOST_UNDERSIZE_PKT])}, 166 {"size_64_packets", 167 offsetof(struct vhost_queue, stats.xstats[VHOST_64_PKT])}, 168 {"size_65_to_127_packets", 169 offsetof(struct vhost_queue, stats.xstats[VHOST_65_TO_127_PKT])}, 170 {"size_128_to_255_packets", 171 offsetof(struct vhost_queue, stats.xstats[VHOST_128_TO_255_PKT])}, 172 {"size_256_to_511_packets", 173 offsetof(struct vhost_queue, stats.xstats[VHOST_256_TO_511_PKT])}, 174 {"size_512_to_1023_packets", 175 offsetof(struct vhost_queue, stats.xstats[VHOST_512_TO_1023_PKT])}, 176 {"size_1024_to_1522_packets", 177 offsetof(struct vhost_queue, stats.xstats[VHOST_1024_TO_1522_PKT])}, 178 {"size_1523_to_max_packets", 179 offsetof(struct vhost_queue, stats.xstats[VHOST_1523_TO_MAX_PKT])}, 180 {"errors_with_bad_CRC", 181 offsetof(struct vhost_queue, stats.xstats[VHOST_ERRORS_PKT])}, 182 {"fragmented_errors", 183 offsetof(struct vhost_queue, stats.xstats[VHOST_ERRORS_FRAGMENTED])}, 184 {"jabber_errors", 185 offsetof(struct vhost_queue, stats.xstats[VHOST_ERRORS_JABBER])}, 186 {"unknown_protos_packets", 187 offsetof(struct vhost_queue, stats.xstats[VHOST_UNKNOWN_PROTOCOL])}, 188 }; 189 190 /* [tx]_ is prepended to the name string here */ 191 static const struct vhost_xstats_name_off vhost_txport_stat_strings[] = { 192 {"good_packets", 193 offsetof(struct vhost_queue, stats.xstats[VHOST_PKT])}, 194 {"total_bytes", 195 offsetof(struct vhost_queue, stats.xstats[VHOST_BYTE])}, 196 {"missed_pkts", 197 offsetof(struct vhost_queue, stats.xstats[VHOST_MISSED_PKT])}, 198 {"broadcast_packets", 199 offsetof(struct vhost_queue, stats.xstats[VHOST_BROADCAST_PKT])}, 200 {"multicast_packets", 201 offsetof(struct vhost_queue, stats.xstats[VHOST_MULTICAST_PKT])}, 202 {"unicast_packets", 203 offsetof(struct vhost_queue, stats.xstats[VHOST_UNICAST_PKT])}, 204 {"undersize_packets", 205 offsetof(struct vhost_queue, stats.xstats[VHOST_UNDERSIZE_PKT])}, 206 {"size_64_packets", 207 offsetof(struct vhost_queue, stats.xstats[VHOST_64_PKT])}, 208 {"size_65_to_127_packets", 209 offsetof(struct vhost_queue, stats.xstats[VHOST_65_TO_127_PKT])}, 210 {"size_128_to_255_packets", 211 offsetof(struct vhost_queue, stats.xstats[VHOST_128_TO_255_PKT])}, 212 {"size_256_to_511_packets", 213 offsetof(struct vhost_queue, stats.xstats[VHOST_256_TO_511_PKT])}, 214 {"size_512_to_1023_packets", 215 offsetof(struct vhost_queue, stats.xstats[VHOST_512_TO_1023_PKT])}, 216 {"size_1024_to_1522_packets", 217 offsetof(struct vhost_queue, stats.xstats[VHOST_1024_TO_1522_PKT])}, 218 {"size_1523_to_max_packets", 219 offsetof(struct vhost_queue, stats.xstats[VHOST_1523_TO_MAX_PKT])}, 220 {"errors_with_bad_CRC", 221 offsetof(struct vhost_queue, stats.xstats[VHOST_ERRORS_PKT])}, 222 }; 223 224 #define VHOST_NB_XSTATS_RXPORT (sizeof(vhost_rxport_stat_strings) / \ 225 sizeof(vhost_rxport_stat_strings[0])) 226 227 #define VHOST_NB_XSTATS_TXPORT (sizeof(vhost_txport_stat_strings) / \ 228 sizeof(vhost_txport_stat_strings[0])) 229 230 static int 231 vhost_dev_xstats_reset(struct rte_eth_dev *dev) 232 { 233 struct vhost_queue *vq = NULL; 234 unsigned int i = 0; 235 236 for (i = 0; i < dev->data->nb_rx_queues; i++) { 237 vq = dev->data->rx_queues[i]; 238 if (!vq) 239 continue; 240 memset(&vq->stats, 0, sizeof(vq->stats)); 241 } 242 for (i = 0; i < dev->data->nb_tx_queues; i++) { 243 vq = dev->data->tx_queues[i]; 244 if (!vq) 245 continue; 246 memset(&vq->stats, 0, sizeof(vq->stats)); 247 } 248 249 return 0; 250 } 251 252 static int 253 vhost_dev_xstats_get_names(struct rte_eth_dev *dev __rte_unused, 254 struct rte_eth_xstat_name *xstats_names, 255 unsigned int limit __rte_unused) 256 { 257 unsigned int t = 0; 258 int count = 0; 259 int nstats = VHOST_NB_XSTATS_RXPORT + VHOST_NB_XSTATS_TXPORT; 260 261 if (!xstats_names) 262 return nstats; 263 for (t = 0; t < VHOST_NB_XSTATS_RXPORT; t++) { 264 snprintf(xstats_names[count].name, 265 sizeof(xstats_names[count].name), 266 "rx_%s", vhost_rxport_stat_strings[t].name); 267 count++; 268 } 269 for (t = 0; t < VHOST_NB_XSTATS_TXPORT; t++) { 270 snprintf(xstats_names[count].name, 271 sizeof(xstats_names[count].name), 272 "tx_%s", vhost_txport_stat_strings[t].name); 273 count++; 274 } 275 return count; 276 } 277 278 static int 279 vhost_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, 280 unsigned int n) 281 { 282 unsigned int i; 283 unsigned int t; 284 unsigned int count = 0; 285 struct vhost_queue *vq = NULL; 286 unsigned int nxstats = VHOST_NB_XSTATS_RXPORT + VHOST_NB_XSTATS_TXPORT; 287 288 if (n < nxstats) 289 return nxstats; 290 291 for (t = 0; t < VHOST_NB_XSTATS_RXPORT; t++) { 292 xstats[count].value = 0; 293 for (i = 0; i < dev->data->nb_rx_queues; i++) { 294 vq = dev->data->rx_queues[i]; 295 if (!vq) 296 continue; 297 xstats[count].value += 298 *(uint64_t *)(((char *)vq) 299 + vhost_rxport_stat_strings[t].offset); 300 } 301 xstats[count].id = count; 302 count++; 303 } 304 for (t = 0; t < VHOST_NB_XSTATS_TXPORT; t++) { 305 xstats[count].value = 0; 306 for (i = 0; i < dev->data->nb_tx_queues; i++) { 307 vq = dev->data->tx_queues[i]; 308 if (!vq) 309 continue; 310 xstats[count].value += 311 *(uint64_t *)(((char *)vq) 312 + vhost_txport_stat_strings[t].offset); 313 } 314 xstats[count].id = count; 315 count++; 316 } 317 return count; 318 } 319 320 static inline void 321 vhost_count_xcast_packets(struct vhost_queue *vq, 322 struct rte_mbuf *mbuf) 323 { 324 struct rte_ether_addr *ea = NULL; 325 struct vhost_stats *pstats = &vq->stats; 326 327 ea = rte_pktmbuf_mtod(mbuf, struct rte_ether_addr *); 328 if (rte_is_multicast_ether_addr(ea)) { 329 if (rte_is_broadcast_ether_addr(ea)) 330 pstats->xstats[VHOST_BROADCAST_PKT]++; 331 else 332 pstats->xstats[VHOST_MULTICAST_PKT]++; 333 } else { 334 pstats->xstats[VHOST_UNICAST_PKT]++; 335 } 336 } 337 338 static __rte_always_inline void 339 vhost_update_single_packet_xstats(struct vhost_queue *vq, struct rte_mbuf *buf) 340 { 341 uint32_t pkt_len = 0; 342 uint64_t index; 343 struct vhost_stats *pstats = &vq->stats; 344 345 pstats->xstats[VHOST_PKT]++; 346 pkt_len = buf->pkt_len; 347 if (pkt_len == 64) { 348 pstats->xstats[VHOST_64_PKT]++; 349 } else if (pkt_len > 64 && pkt_len < 1024) { 350 index = (sizeof(pkt_len) * 8) 351 - __builtin_clz(pkt_len) - 5; 352 pstats->xstats[index]++; 353 } else { 354 if (pkt_len < 64) 355 pstats->xstats[VHOST_UNDERSIZE_PKT]++; 356 else if (pkt_len <= 1522) 357 pstats->xstats[VHOST_1024_TO_1522_PKT]++; 358 else if (pkt_len > 1522) 359 pstats->xstats[VHOST_1523_TO_MAX_PKT]++; 360 } 361 vhost_count_xcast_packets(vq, buf); 362 } 363 364 static uint16_t 365 eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs) 366 { 367 struct vhost_queue *r = q; 368 uint16_t i, nb_rx = 0; 369 uint16_t nb_receive = nb_bufs; 370 371 if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0)) 372 return 0; 373 374 rte_atomic32_set(&r->while_queuing, 1); 375 376 if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0)) 377 goto out; 378 379 /* Dequeue packets from guest TX queue */ 380 while (nb_receive) { 381 uint16_t nb_pkts; 382 uint16_t num = (uint16_t)RTE_MIN(nb_receive, 383 VHOST_MAX_PKT_BURST); 384 385 nb_pkts = rte_vhost_dequeue_burst(r->vid, r->virtqueue_id, 386 r->mb_pool, &bufs[nb_rx], 387 num); 388 389 nb_rx += nb_pkts; 390 nb_receive -= nb_pkts; 391 if (nb_pkts < num) 392 break; 393 } 394 395 r->stats.pkts += nb_rx; 396 397 for (i = 0; likely(i < nb_rx); i++) { 398 bufs[i]->port = r->port; 399 bufs[i]->vlan_tci = 0; 400 401 if (r->internal->vlan_strip) 402 rte_vlan_strip(bufs[i]); 403 404 r->stats.bytes += bufs[i]->pkt_len; 405 r->stats.xstats[VHOST_BYTE] += bufs[i]->pkt_len; 406 407 vhost_update_single_packet_xstats(r, bufs[i]); 408 } 409 410 out: 411 rte_atomic32_set(&r->while_queuing, 0); 412 413 return nb_rx; 414 } 415 416 static uint16_t 417 eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs) 418 { 419 struct vhost_queue *r = q; 420 uint16_t i, nb_tx = 0; 421 uint16_t nb_send = 0; 422 uint64_t nb_bytes = 0; 423 uint64_t nb_missed = 0; 424 425 if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0)) 426 return 0; 427 428 rte_atomic32_set(&r->while_queuing, 1); 429 430 if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0)) 431 goto out; 432 433 for (i = 0; i < nb_bufs; i++) { 434 struct rte_mbuf *m = bufs[i]; 435 436 /* Do VLAN tag insertion */ 437 if (m->ol_flags & RTE_MBUF_F_TX_VLAN) { 438 int error = rte_vlan_insert(&m); 439 if (unlikely(error)) { 440 rte_pktmbuf_free(m); 441 continue; 442 } 443 } 444 445 bufs[nb_send] = m; 446 ++nb_send; 447 } 448 449 /* Enqueue packets to guest RX queue */ 450 while (nb_send) { 451 uint16_t nb_pkts; 452 uint16_t num = (uint16_t)RTE_MIN(nb_send, 453 VHOST_MAX_PKT_BURST); 454 455 nb_pkts = rte_vhost_enqueue_burst(r->vid, r->virtqueue_id, 456 &bufs[nb_tx], num); 457 458 nb_tx += nb_pkts; 459 nb_send -= nb_pkts; 460 if (nb_pkts < num) 461 break; 462 } 463 464 for (i = 0; likely(i < nb_tx); i++) { 465 nb_bytes += bufs[i]->pkt_len; 466 vhost_update_single_packet_xstats(r, bufs[i]); 467 } 468 469 nb_missed = nb_bufs - nb_tx; 470 471 r->stats.pkts += nb_tx; 472 r->stats.bytes += nb_bytes; 473 r->stats.missed_pkts += nb_missed; 474 475 r->stats.xstats[VHOST_BYTE] += nb_bytes; 476 r->stats.xstats[VHOST_MISSED_PKT] += nb_missed; 477 r->stats.xstats[VHOST_UNICAST_PKT] += nb_missed; 478 479 /* According to RFC2863, ifHCOutUcastPkts, ifHCOutMulticastPkts and 480 * ifHCOutBroadcastPkts counters are increased when packets are not 481 * transmitted successfully. 482 */ 483 for (i = nb_tx; i < nb_bufs; i++) 484 vhost_count_xcast_packets(r, bufs[i]); 485 486 for (i = 0; likely(i < nb_tx); i++) 487 rte_pktmbuf_free(bufs[i]); 488 out: 489 rte_atomic32_set(&r->while_queuing, 0); 490 491 return nb_tx; 492 } 493 494 static inline struct internal_list * 495 find_internal_resource(char *ifname) 496 { 497 int found = 0; 498 struct internal_list *list; 499 struct pmd_internal *internal; 500 501 if (ifname == NULL) 502 return NULL; 503 504 pthread_mutex_lock(&internal_list_lock); 505 506 TAILQ_FOREACH(list, &internal_list, next) { 507 internal = list->eth_dev->data->dev_private; 508 if (!strcmp(internal->iface_name, ifname)) { 509 found = 1; 510 break; 511 } 512 } 513 514 pthread_mutex_unlock(&internal_list_lock); 515 516 if (!found) 517 return NULL; 518 519 return list; 520 } 521 522 static int 523 eth_vhost_update_intr(struct rte_eth_dev *eth_dev, uint16_t rxq_idx) 524 { 525 struct rte_intr_handle *handle = eth_dev->intr_handle; 526 struct rte_epoll_event rev, *elist; 527 int epfd, ret; 528 529 if (handle == NULL) 530 return 0; 531 532 elist = rte_intr_elist_index_get(handle, rxq_idx); 533 if (rte_intr_efds_index_get(handle, rxq_idx) == elist->fd) 534 return 0; 535 536 VHOST_LOG(INFO, "kickfd for rxq-%d was changed, updating handler.\n", 537 rxq_idx); 538 539 if (elist->fd != -1) 540 VHOST_LOG(ERR, "Unexpected previous kickfd value (Got %d, expected -1).\n", 541 elist->fd); 542 543 /* 544 * First remove invalid epoll event, and then install 545 * the new one. May be solved with a proper API in the 546 * future. 547 */ 548 epfd = elist->epfd; 549 rev = *elist; 550 ret = rte_epoll_ctl(epfd, EPOLL_CTL_DEL, rev.fd, 551 elist); 552 if (ret) { 553 VHOST_LOG(ERR, "Delete epoll event failed.\n"); 554 return ret; 555 } 556 557 rev.fd = rte_intr_efds_index_get(handle, rxq_idx); 558 if (rte_intr_elist_index_set(handle, rxq_idx, rev)) 559 return -rte_errno; 560 561 elist = rte_intr_elist_index_get(handle, rxq_idx); 562 ret = rte_epoll_ctl(epfd, EPOLL_CTL_ADD, rev.fd, elist); 563 if (ret) { 564 VHOST_LOG(ERR, "Add epoll event failed.\n"); 565 return ret; 566 } 567 568 return 0; 569 } 570 571 static int 572 eth_rxq_intr_enable(struct rte_eth_dev *dev, uint16_t qid) 573 { 574 struct rte_vhost_vring vring; 575 struct vhost_queue *vq; 576 int old_intr_enable, ret = 0; 577 578 vq = dev->data->rx_queues[qid]; 579 if (!vq) { 580 VHOST_LOG(ERR, "rxq%d is not setup yet\n", qid); 581 return -1; 582 } 583 584 rte_spinlock_lock(&vq->intr_lock); 585 old_intr_enable = vq->intr_enable; 586 vq->intr_enable = 1; 587 ret = eth_vhost_update_intr(dev, qid); 588 rte_spinlock_unlock(&vq->intr_lock); 589 590 if (ret < 0) { 591 VHOST_LOG(ERR, "Failed to update rxq%d's intr\n", qid); 592 vq->intr_enable = old_intr_enable; 593 return ret; 594 } 595 596 ret = rte_vhost_get_vhost_vring(vq->vid, (qid << 1) + 1, &vring); 597 if (ret < 0) { 598 VHOST_LOG(ERR, "Failed to get rxq%d's vring\n", qid); 599 return ret; 600 } 601 VHOST_LOG(INFO, "Enable interrupt for rxq%d\n", qid); 602 rte_vhost_enable_guest_notification(vq->vid, (qid << 1) + 1, 1); 603 rte_wmb(); 604 605 return ret; 606 } 607 608 static int 609 eth_rxq_intr_disable(struct rte_eth_dev *dev, uint16_t qid) 610 { 611 struct rte_vhost_vring vring; 612 struct vhost_queue *vq; 613 int ret = 0; 614 615 vq = dev->data->rx_queues[qid]; 616 if (!vq) { 617 VHOST_LOG(ERR, "rxq%d is not setup yet\n", qid); 618 return -1; 619 } 620 621 ret = rte_vhost_get_vhost_vring(vq->vid, (qid << 1) + 1, &vring); 622 if (ret < 0) { 623 VHOST_LOG(ERR, "Failed to get rxq%d's vring", qid); 624 return ret; 625 } 626 VHOST_LOG(INFO, "Disable interrupt for rxq%d\n", qid); 627 rte_vhost_enable_guest_notification(vq->vid, (qid << 1) + 1, 0); 628 rte_wmb(); 629 630 vq->intr_enable = 0; 631 632 return 0; 633 } 634 635 static void 636 eth_vhost_uninstall_intr(struct rte_eth_dev *dev) 637 { 638 struct rte_intr_handle *intr_handle = dev->intr_handle; 639 640 if (intr_handle != NULL) { 641 rte_intr_vec_list_free(intr_handle); 642 rte_intr_instance_free(intr_handle); 643 } 644 dev->intr_handle = NULL; 645 } 646 647 static int 648 eth_vhost_install_intr(struct rte_eth_dev *dev) 649 { 650 struct rte_vhost_vring vring; 651 struct vhost_queue *vq; 652 int nb_rxq = dev->data->nb_rx_queues; 653 int i; 654 int ret; 655 656 /* uninstall firstly if we are reconnecting */ 657 if (dev->intr_handle != NULL) 658 eth_vhost_uninstall_intr(dev); 659 660 dev->intr_handle = rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_PRIVATE); 661 if (dev->intr_handle == NULL) { 662 VHOST_LOG(ERR, "Fail to allocate intr_handle\n"); 663 return -ENOMEM; 664 } 665 if (rte_intr_efd_counter_size_set(dev->intr_handle, sizeof(uint64_t))) 666 return -rte_errno; 667 668 if (rte_intr_vec_list_alloc(dev->intr_handle, NULL, nb_rxq)) { 669 VHOST_LOG(ERR, 670 "Failed to allocate memory for interrupt vector\n"); 671 rte_intr_instance_free(dev->intr_handle); 672 return -ENOMEM; 673 } 674 675 676 VHOST_LOG(INFO, "Prepare intr vec\n"); 677 for (i = 0; i < nb_rxq; i++) { 678 if (rte_intr_vec_list_index_set(dev->intr_handle, i, RTE_INTR_VEC_RXTX_OFFSET + i)) 679 return -rte_errno; 680 if (rte_intr_efds_index_set(dev->intr_handle, i, -1)) 681 return -rte_errno; 682 vq = dev->data->rx_queues[i]; 683 if (!vq) { 684 VHOST_LOG(INFO, "rxq-%d not setup yet, skip!\n", i); 685 continue; 686 } 687 688 ret = rte_vhost_get_vhost_vring(vq->vid, (i << 1) + 1, &vring); 689 if (ret < 0) { 690 VHOST_LOG(INFO, 691 "Failed to get rxq-%d's vring, skip!\n", i); 692 continue; 693 } 694 695 if (vring.kickfd < 0) { 696 VHOST_LOG(INFO, 697 "rxq-%d's kickfd is invalid, skip!\n", i); 698 continue; 699 } 700 701 if (rte_intr_efds_index_set(dev->intr_handle, i, vring.kickfd)) 702 continue; 703 VHOST_LOG(INFO, "Installed intr vec for rxq-%d\n", i); 704 } 705 706 if (rte_intr_nb_efd_set(dev->intr_handle, nb_rxq)) 707 return -rte_errno; 708 709 if (rte_intr_max_intr_set(dev->intr_handle, nb_rxq + 1)) 710 return -rte_errno; 711 712 if (rte_intr_type_set(dev->intr_handle, RTE_INTR_HANDLE_VDEV)) 713 return -rte_errno; 714 715 return 0; 716 } 717 718 static void 719 update_queuing_status(struct rte_eth_dev *dev) 720 { 721 struct pmd_internal *internal = dev->data->dev_private; 722 struct vhost_queue *vq; 723 unsigned int i; 724 int allow_queuing = 1; 725 726 if (!dev->data->rx_queues || !dev->data->tx_queues) 727 return; 728 729 if (rte_atomic32_read(&internal->started) == 0 || 730 rte_atomic32_read(&internal->dev_attached) == 0) 731 allow_queuing = 0; 732 733 /* Wait until rx/tx_pkt_burst stops accessing vhost device */ 734 for (i = 0; i < dev->data->nb_rx_queues; i++) { 735 vq = dev->data->rx_queues[i]; 736 if (vq == NULL) 737 continue; 738 rte_atomic32_set(&vq->allow_queuing, allow_queuing); 739 while (rte_atomic32_read(&vq->while_queuing)) 740 rte_pause(); 741 } 742 743 for (i = 0; i < dev->data->nb_tx_queues; i++) { 744 vq = dev->data->tx_queues[i]; 745 if (vq == NULL) 746 continue; 747 rte_atomic32_set(&vq->allow_queuing, allow_queuing); 748 while (rte_atomic32_read(&vq->while_queuing)) 749 rte_pause(); 750 } 751 } 752 753 static void 754 queue_setup(struct rte_eth_dev *eth_dev, struct pmd_internal *internal) 755 { 756 struct vhost_queue *vq; 757 int i; 758 759 for (i = 0; i < eth_dev->data->nb_rx_queues; i++) { 760 vq = eth_dev->data->rx_queues[i]; 761 if (!vq) 762 continue; 763 vq->vid = internal->vid; 764 vq->internal = internal; 765 vq->port = eth_dev->data->port_id; 766 } 767 for (i = 0; i < eth_dev->data->nb_tx_queues; i++) { 768 vq = eth_dev->data->tx_queues[i]; 769 if (!vq) 770 continue; 771 vq->vid = internal->vid; 772 vq->internal = internal; 773 vq->port = eth_dev->data->port_id; 774 } 775 } 776 777 static int 778 new_device(int vid) 779 { 780 struct rte_eth_dev *eth_dev; 781 struct internal_list *list; 782 struct pmd_internal *internal; 783 struct rte_eth_conf *dev_conf; 784 unsigned i; 785 char ifname[PATH_MAX]; 786 #ifdef RTE_LIBRTE_VHOST_NUMA 787 int newnode; 788 #endif 789 790 rte_vhost_get_ifname(vid, ifname, sizeof(ifname)); 791 list = find_internal_resource(ifname); 792 if (list == NULL) { 793 VHOST_LOG(INFO, "Invalid device name: %s\n", ifname); 794 return -1; 795 } 796 797 eth_dev = list->eth_dev; 798 internal = eth_dev->data->dev_private; 799 dev_conf = ð_dev->data->dev_conf; 800 801 #ifdef RTE_LIBRTE_VHOST_NUMA 802 newnode = rte_vhost_get_numa_node(vid); 803 if (newnode >= 0) 804 eth_dev->data->numa_node = newnode; 805 #endif 806 807 internal->vid = vid; 808 if (rte_atomic32_read(&internal->started) == 1) { 809 queue_setup(eth_dev, internal); 810 811 if (dev_conf->intr_conf.rxq) { 812 if (eth_vhost_install_intr(eth_dev) < 0) { 813 VHOST_LOG(INFO, 814 "Failed to install interrupt handler."); 815 return -1; 816 } 817 } 818 } else { 819 VHOST_LOG(INFO, "RX/TX queues not exist yet\n"); 820 } 821 822 for (i = 0; i < rte_vhost_get_vring_num(vid); i++) 823 rte_vhost_enable_guest_notification(vid, i, 0); 824 825 rte_vhost_get_mtu(vid, ð_dev->data->mtu); 826 827 eth_dev->data->dev_link.link_status = RTE_ETH_LINK_UP; 828 829 rte_atomic32_set(&internal->dev_attached, 1); 830 update_queuing_status(eth_dev); 831 832 VHOST_LOG(INFO, "Vhost device %d created\n", vid); 833 834 rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL); 835 836 return 0; 837 } 838 839 static void 840 destroy_device(int vid) 841 { 842 struct rte_eth_dev *eth_dev; 843 struct pmd_internal *internal; 844 struct vhost_queue *vq; 845 struct internal_list *list; 846 char ifname[PATH_MAX]; 847 unsigned i; 848 struct rte_vhost_vring_state *state; 849 850 rte_vhost_get_ifname(vid, ifname, sizeof(ifname)); 851 list = find_internal_resource(ifname); 852 if (list == NULL) { 853 VHOST_LOG(ERR, "Invalid interface name: %s\n", ifname); 854 return; 855 } 856 eth_dev = list->eth_dev; 857 internal = eth_dev->data->dev_private; 858 859 rte_atomic32_set(&internal->dev_attached, 0); 860 update_queuing_status(eth_dev); 861 862 eth_dev->data->dev_link.link_status = RTE_ETH_LINK_DOWN; 863 864 if (eth_dev->data->rx_queues && eth_dev->data->tx_queues) { 865 for (i = 0; i < eth_dev->data->nb_rx_queues; i++) { 866 vq = eth_dev->data->rx_queues[i]; 867 if (!vq) 868 continue; 869 vq->vid = -1; 870 } 871 for (i = 0; i < eth_dev->data->nb_tx_queues; i++) { 872 vq = eth_dev->data->tx_queues[i]; 873 if (!vq) 874 continue; 875 vq->vid = -1; 876 } 877 } 878 879 state = vring_states[eth_dev->data->port_id]; 880 rte_spinlock_lock(&state->lock); 881 for (i = 0; i <= state->max_vring; i++) { 882 state->cur[i] = false; 883 state->seen[i] = false; 884 } 885 state->max_vring = 0; 886 rte_spinlock_unlock(&state->lock); 887 888 VHOST_LOG(INFO, "Vhost device %d destroyed\n", vid); 889 eth_vhost_uninstall_intr(eth_dev); 890 891 rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL); 892 } 893 894 static int 895 vring_conf_update(int vid, struct rte_eth_dev *eth_dev, uint16_t vring_id) 896 { 897 struct rte_eth_conf *dev_conf = ð_dev->data->dev_conf; 898 struct pmd_internal *internal = eth_dev->data->dev_private; 899 struct vhost_queue *vq; 900 struct rte_vhost_vring vring; 901 int rx_idx = vring_id % 2 ? (vring_id - 1) >> 1 : -1; 902 int ret = 0; 903 904 /* 905 * The vring kickfd may be changed after the new device notification. 906 * Update it when the vring state is updated. 907 */ 908 if (rx_idx >= 0 && rx_idx < eth_dev->data->nb_rx_queues && 909 rte_atomic32_read(&internal->dev_attached) && 910 rte_atomic32_read(&internal->started) && 911 dev_conf->intr_conf.rxq) { 912 ret = rte_vhost_get_vhost_vring(vid, vring_id, &vring); 913 if (ret) { 914 VHOST_LOG(ERR, "Failed to get vring %d information.\n", 915 vring_id); 916 return ret; 917 } 918 919 if (rte_intr_efds_index_set(eth_dev->intr_handle, rx_idx, 920 vring.kickfd)) 921 return -rte_errno; 922 923 vq = eth_dev->data->rx_queues[rx_idx]; 924 if (!vq) { 925 VHOST_LOG(ERR, "rxq%d is not setup yet\n", rx_idx); 926 return -1; 927 } 928 929 rte_spinlock_lock(&vq->intr_lock); 930 if (vq->intr_enable) 931 ret = eth_vhost_update_intr(eth_dev, rx_idx); 932 rte_spinlock_unlock(&vq->intr_lock); 933 } 934 935 return ret; 936 } 937 938 static int 939 vring_state_changed(int vid, uint16_t vring, int enable) 940 { 941 struct rte_vhost_vring_state *state; 942 struct rte_eth_dev *eth_dev; 943 struct internal_list *list; 944 char ifname[PATH_MAX]; 945 946 rte_vhost_get_ifname(vid, ifname, sizeof(ifname)); 947 list = find_internal_resource(ifname); 948 if (list == NULL) { 949 VHOST_LOG(ERR, "Invalid interface name: %s\n", ifname); 950 return -1; 951 } 952 953 eth_dev = list->eth_dev; 954 /* won't be NULL */ 955 state = vring_states[eth_dev->data->port_id]; 956 957 if (enable && vring_conf_update(vid, eth_dev, vring)) 958 VHOST_LOG(INFO, "Failed to update vring-%d configuration.\n", 959 (int)vring); 960 961 rte_spinlock_lock(&state->lock); 962 if (state->cur[vring] == enable) { 963 rte_spinlock_unlock(&state->lock); 964 return 0; 965 } 966 state->cur[vring] = enable; 967 state->max_vring = RTE_MAX(vring, state->max_vring); 968 rte_spinlock_unlock(&state->lock); 969 970 VHOST_LOG(INFO, "vring%u is %s\n", 971 vring, enable ? "enabled" : "disabled"); 972 973 rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_QUEUE_STATE, NULL); 974 975 return 0; 976 } 977 978 static struct rte_vhost_device_ops vhost_ops = { 979 .new_device = new_device, 980 .destroy_device = destroy_device, 981 .vring_state_changed = vring_state_changed, 982 }; 983 984 static int 985 vhost_driver_setup(struct rte_eth_dev *eth_dev) 986 { 987 struct pmd_internal *internal = eth_dev->data->dev_private; 988 struct internal_list *list = NULL; 989 struct rte_vhost_vring_state *vring_state = NULL; 990 unsigned int numa_node = eth_dev->device->numa_node; 991 const char *name = eth_dev->device->name; 992 993 /* Don't try to setup again if it has already been done. */ 994 list = find_internal_resource(internal->iface_name); 995 if (list) 996 return 0; 997 998 list = rte_zmalloc_socket(name, sizeof(*list), 0, numa_node); 999 if (list == NULL) 1000 return -1; 1001 1002 vring_state = rte_zmalloc_socket(name, sizeof(*vring_state), 1003 0, numa_node); 1004 if (vring_state == NULL) 1005 goto free_list; 1006 1007 list->eth_dev = eth_dev; 1008 pthread_mutex_lock(&internal_list_lock); 1009 TAILQ_INSERT_TAIL(&internal_list, list, next); 1010 pthread_mutex_unlock(&internal_list_lock); 1011 1012 rte_spinlock_init(&vring_state->lock); 1013 vring_states[eth_dev->data->port_id] = vring_state; 1014 1015 if (rte_vhost_driver_register(internal->iface_name, internal->flags)) 1016 goto list_remove; 1017 1018 if (internal->disable_flags) { 1019 if (rte_vhost_driver_disable_features(internal->iface_name, 1020 internal->disable_flags)) 1021 goto drv_unreg; 1022 } 1023 1024 if (rte_vhost_driver_callback_register(internal->iface_name, 1025 &vhost_ops) < 0) { 1026 VHOST_LOG(ERR, "Can't register callbacks\n"); 1027 goto drv_unreg; 1028 } 1029 1030 if (rte_vhost_driver_start(internal->iface_name) < 0) { 1031 VHOST_LOG(ERR, "Failed to start driver for %s\n", 1032 internal->iface_name); 1033 goto drv_unreg; 1034 } 1035 1036 return 0; 1037 1038 drv_unreg: 1039 rte_vhost_driver_unregister(internal->iface_name); 1040 list_remove: 1041 vring_states[eth_dev->data->port_id] = NULL; 1042 pthread_mutex_lock(&internal_list_lock); 1043 TAILQ_REMOVE(&internal_list, list, next); 1044 pthread_mutex_unlock(&internal_list_lock); 1045 rte_free(vring_state); 1046 free_list: 1047 rte_free(list); 1048 1049 return -1; 1050 } 1051 1052 int 1053 rte_eth_vhost_get_queue_event(uint16_t port_id, 1054 struct rte_eth_vhost_queue_event *event) 1055 { 1056 struct rte_vhost_vring_state *state; 1057 unsigned int i; 1058 int idx; 1059 1060 if (port_id >= RTE_MAX_ETHPORTS) { 1061 VHOST_LOG(ERR, "Invalid port id\n"); 1062 return -1; 1063 } 1064 1065 state = vring_states[port_id]; 1066 if (!state) { 1067 VHOST_LOG(ERR, "Unused port\n"); 1068 return -1; 1069 } 1070 1071 rte_spinlock_lock(&state->lock); 1072 for (i = 0; i <= state->max_vring; i++) { 1073 idx = state->index++ % (state->max_vring + 1); 1074 1075 if (state->cur[idx] != state->seen[idx]) { 1076 state->seen[idx] = state->cur[idx]; 1077 event->queue_id = idx / 2; 1078 event->rx = idx & 1; 1079 event->enable = state->cur[idx]; 1080 rte_spinlock_unlock(&state->lock); 1081 return 0; 1082 } 1083 } 1084 rte_spinlock_unlock(&state->lock); 1085 1086 return -1; 1087 } 1088 1089 int 1090 rte_eth_vhost_get_vid_from_port_id(uint16_t port_id) 1091 { 1092 struct internal_list *list; 1093 struct rte_eth_dev *eth_dev; 1094 struct vhost_queue *vq; 1095 int vid = -1; 1096 1097 if (!rte_eth_dev_is_valid_port(port_id)) 1098 return -1; 1099 1100 pthread_mutex_lock(&internal_list_lock); 1101 1102 TAILQ_FOREACH(list, &internal_list, next) { 1103 eth_dev = list->eth_dev; 1104 if (eth_dev->data->port_id == port_id) { 1105 vq = eth_dev->data->rx_queues[0]; 1106 if (vq) { 1107 vid = vq->vid; 1108 } 1109 break; 1110 } 1111 } 1112 1113 pthread_mutex_unlock(&internal_list_lock); 1114 1115 return vid; 1116 } 1117 1118 static int 1119 eth_dev_configure(struct rte_eth_dev *dev) 1120 { 1121 struct pmd_internal *internal = dev->data->dev_private; 1122 const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode; 1123 1124 /* NOTE: the same process has to operate a vhost interface 1125 * from beginning to end (from eth_dev configure to eth_dev close). 1126 * It is user's responsibility at the moment. 1127 */ 1128 if (vhost_driver_setup(dev) < 0) 1129 return -1; 1130 1131 internal->vlan_strip = !!(rxmode->offloads & RTE_ETH_RX_OFFLOAD_VLAN_STRIP); 1132 1133 return 0; 1134 } 1135 1136 static int 1137 eth_dev_start(struct rte_eth_dev *eth_dev) 1138 { 1139 struct pmd_internal *internal = eth_dev->data->dev_private; 1140 struct rte_eth_conf *dev_conf = ð_dev->data->dev_conf; 1141 1142 queue_setup(eth_dev, internal); 1143 1144 if (rte_atomic32_read(&internal->dev_attached) == 1) { 1145 if (dev_conf->intr_conf.rxq) { 1146 if (eth_vhost_install_intr(eth_dev) < 0) { 1147 VHOST_LOG(INFO, 1148 "Failed to install interrupt handler."); 1149 return -1; 1150 } 1151 } 1152 } 1153 1154 rte_atomic32_set(&internal->started, 1); 1155 update_queuing_status(eth_dev); 1156 1157 return 0; 1158 } 1159 1160 static int 1161 eth_dev_stop(struct rte_eth_dev *dev) 1162 { 1163 struct pmd_internal *internal = dev->data->dev_private; 1164 1165 dev->data->dev_started = 0; 1166 rte_atomic32_set(&internal->started, 0); 1167 update_queuing_status(dev); 1168 1169 return 0; 1170 } 1171 1172 static int 1173 eth_dev_close(struct rte_eth_dev *dev) 1174 { 1175 struct pmd_internal *internal; 1176 struct internal_list *list; 1177 unsigned int i, ret; 1178 1179 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 1180 return 0; 1181 1182 internal = dev->data->dev_private; 1183 if (!internal) 1184 return 0; 1185 1186 ret = eth_dev_stop(dev); 1187 1188 list = find_internal_resource(internal->iface_name); 1189 if (list) { 1190 rte_vhost_driver_unregister(internal->iface_name); 1191 pthread_mutex_lock(&internal_list_lock); 1192 TAILQ_REMOVE(&internal_list, list, next); 1193 pthread_mutex_unlock(&internal_list_lock); 1194 rte_free(list); 1195 } 1196 1197 if (dev->data->rx_queues) 1198 for (i = 0; i < dev->data->nb_rx_queues; i++) 1199 rte_free(dev->data->rx_queues[i]); 1200 1201 if (dev->data->tx_queues) 1202 for (i = 0; i < dev->data->nb_tx_queues; i++) 1203 rte_free(dev->data->tx_queues[i]); 1204 1205 rte_free(internal->iface_name); 1206 rte_free(internal); 1207 1208 dev->data->dev_private = NULL; 1209 1210 rte_free(vring_states[dev->data->port_id]); 1211 vring_states[dev->data->port_id] = NULL; 1212 1213 return ret; 1214 } 1215 1216 static int 1217 eth_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id, 1218 uint16_t nb_rx_desc __rte_unused, 1219 unsigned int socket_id, 1220 const struct rte_eth_rxconf *rx_conf __rte_unused, 1221 struct rte_mempool *mb_pool) 1222 { 1223 struct vhost_queue *vq; 1224 1225 vq = rte_zmalloc_socket(NULL, sizeof(struct vhost_queue), 1226 RTE_CACHE_LINE_SIZE, socket_id); 1227 if (vq == NULL) { 1228 VHOST_LOG(ERR, "Failed to allocate memory for rx queue\n"); 1229 return -ENOMEM; 1230 } 1231 1232 vq->mb_pool = mb_pool; 1233 vq->virtqueue_id = rx_queue_id * VIRTIO_QNUM + VIRTIO_TXQ; 1234 rte_spinlock_init(&vq->intr_lock); 1235 dev->data->rx_queues[rx_queue_id] = vq; 1236 1237 return 0; 1238 } 1239 1240 static int 1241 eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id, 1242 uint16_t nb_tx_desc __rte_unused, 1243 unsigned int socket_id, 1244 const struct rte_eth_txconf *tx_conf __rte_unused) 1245 { 1246 struct vhost_queue *vq; 1247 1248 vq = rte_zmalloc_socket(NULL, sizeof(struct vhost_queue), 1249 RTE_CACHE_LINE_SIZE, socket_id); 1250 if (vq == NULL) { 1251 VHOST_LOG(ERR, "Failed to allocate memory for tx queue\n"); 1252 return -ENOMEM; 1253 } 1254 1255 vq->virtqueue_id = tx_queue_id * VIRTIO_QNUM + VIRTIO_RXQ; 1256 rte_spinlock_init(&vq->intr_lock); 1257 dev->data->tx_queues[tx_queue_id] = vq; 1258 1259 return 0; 1260 } 1261 1262 static int 1263 eth_dev_info(struct rte_eth_dev *dev, 1264 struct rte_eth_dev_info *dev_info) 1265 { 1266 struct pmd_internal *internal; 1267 1268 internal = dev->data->dev_private; 1269 if (internal == NULL) { 1270 VHOST_LOG(ERR, "Invalid device specified\n"); 1271 return -ENODEV; 1272 } 1273 1274 dev_info->max_mac_addrs = 1; 1275 dev_info->max_rx_pktlen = (uint32_t)-1; 1276 dev_info->max_rx_queues = internal->max_queues; 1277 dev_info->max_tx_queues = internal->max_queues; 1278 dev_info->min_rx_bufsize = 0; 1279 1280 dev_info->tx_offload_capa = RTE_ETH_TX_OFFLOAD_MULTI_SEGS | 1281 RTE_ETH_TX_OFFLOAD_VLAN_INSERT; 1282 dev_info->rx_offload_capa = RTE_ETH_RX_OFFLOAD_VLAN_STRIP; 1283 1284 return 0; 1285 } 1286 1287 static int 1288 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) 1289 { 1290 unsigned i; 1291 unsigned long rx_total = 0, tx_total = 0; 1292 unsigned long rx_total_bytes = 0, tx_total_bytes = 0; 1293 struct vhost_queue *vq; 1294 1295 for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS && 1296 i < dev->data->nb_rx_queues; i++) { 1297 if (dev->data->rx_queues[i] == NULL) 1298 continue; 1299 vq = dev->data->rx_queues[i]; 1300 stats->q_ipackets[i] = vq->stats.pkts; 1301 rx_total += stats->q_ipackets[i]; 1302 1303 stats->q_ibytes[i] = vq->stats.bytes; 1304 rx_total_bytes += stats->q_ibytes[i]; 1305 } 1306 1307 for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS && 1308 i < dev->data->nb_tx_queues; i++) { 1309 if (dev->data->tx_queues[i] == NULL) 1310 continue; 1311 vq = dev->data->tx_queues[i]; 1312 stats->q_opackets[i] = vq->stats.pkts; 1313 tx_total += stats->q_opackets[i]; 1314 1315 stats->q_obytes[i] = vq->stats.bytes; 1316 tx_total_bytes += stats->q_obytes[i]; 1317 } 1318 1319 stats->ipackets = rx_total; 1320 stats->opackets = tx_total; 1321 stats->ibytes = rx_total_bytes; 1322 stats->obytes = tx_total_bytes; 1323 1324 return 0; 1325 } 1326 1327 static int 1328 eth_stats_reset(struct rte_eth_dev *dev) 1329 { 1330 struct vhost_queue *vq; 1331 unsigned i; 1332 1333 for (i = 0; i < dev->data->nb_rx_queues; i++) { 1334 if (dev->data->rx_queues[i] == NULL) 1335 continue; 1336 vq = dev->data->rx_queues[i]; 1337 vq->stats.pkts = 0; 1338 vq->stats.bytes = 0; 1339 } 1340 for (i = 0; i < dev->data->nb_tx_queues; i++) { 1341 if (dev->data->tx_queues[i] == NULL) 1342 continue; 1343 vq = dev->data->tx_queues[i]; 1344 vq->stats.pkts = 0; 1345 vq->stats.bytes = 0; 1346 vq->stats.missed_pkts = 0; 1347 } 1348 1349 return 0; 1350 } 1351 1352 static void 1353 eth_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 1354 { 1355 rte_free(dev->data->rx_queues[qid]); 1356 } 1357 1358 static void 1359 eth_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 1360 { 1361 rte_free(dev->data->tx_queues[qid]); 1362 } 1363 1364 static int 1365 eth_tx_done_cleanup(void *txq __rte_unused, uint32_t free_cnt __rte_unused) 1366 { 1367 /* 1368 * vHost does not hang onto mbuf. eth_vhost_tx() copies packet data 1369 * and releases mbuf, so nothing to cleanup. 1370 */ 1371 return 0; 1372 } 1373 1374 static int 1375 eth_link_update(struct rte_eth_dev *dev __rte_unused, 1376 int wait_to_complete __rte_unused) 1377 { 1378 return 0; 1379 } 1380 1381 static uint32_t 1382 eth_rx_queue_count(void *rx_queue) 1383 { 1384 struct vhost_queue *vq; 1385 1386 vq = rx_queue; 1387 if (vq == NULL) 1388 return 0; 1389 1390 return rte_vhost_rx_queue_count(vq->vid, vq->virtqueue_id); 1391 } 1392 1393 #define CLB_VAL_IDX 0 1394 #define CLB_MSK_IDX 1 1395 #define CLB_MATCH_IDX 2 1396 static int 1397 vhost_monitor_callback(const uint64_t value, 1398 const uint64_t opaque[RTE_POWER_MONITOR_OPAQUE_SZ]) 1399 { 1400 const uint64_t m = opaque[CLB_MSK_IDX]; 1401 const uint64_t v = opaque[CLB_VAL_IDX]; 1402 const uint64_t c = opaque[CLB_MATCH_IDX]; 1403 1404 if (c) 1405 return (value & m) == v ? -1 : 0; 1406 else 1407 return (value & m) == v ? 0 : -1; 1408 } 1409 1410 static int 1411 vhost_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc) 1412 { 1413 struct vhost_queue *vq = rx_queue; 1414 struct rte_vhost_power_monitor_cond vhost_pmc; 1415 int ret; 1416 if (vq == NULL) 1417 return -EINVAL; 1418 ret = rte_vhost_get_monitor_addr(vq->vid, vq->virtqueue_id, 1419 &vhost_pmc); 1420 if (ret < 0) 1421 return -EINVAL; 1422 pmc->addr = vhost_pmc.addr; 1423 pmc->opaque[CLB_VAL_IDX] = vhost_pmc.val; 1424 pmc->opaque[CLB_MSK_IDX] = vhost_pmc.mask; 1425 pmc->opaque[CLB_MATCH_IDX] = vhost_pmc.match; 1426 pmc->size = vhost_pmc.size; 1427 pmc->fn = vhost_monitor_callback; 1428 1429 return 0; 1430 } 1431 1432 static const struct eth_dev_ops ops = { 1433 .dev_start = eth_dev_start, 1434 .dev_stop = eth_dev_stop, 1435 .dev_close = eth_dev_close, 1436 .dev_configure = eth_dev_configure, 1437 .dev_infos_get = eth_dev_info, 1438 .rx_queue_setup = eth_rx_queue_setup, 1439 .tx_queue_setup = eth_tx_queue_setup, 1440 .rx_queue_release = eth_rx_queue_release, 1441 .tx_queue_release = eth_tx_queue_release, 1442 .tx_done_cleanup = eth_tx_done_cleanup, 1443 .link_update = eth_link_update, 1444 .stats_get = eth_stats_get, 1445 .stats_reset = eth_stats_reset, 1446 .xstats_reset = vhost_dev_xstats_reset, 1447 .xstats_get = vhost_dev_xstats_get, 1448 .xstats_get_names = vhost_dev_xstats_get_names, 1449 .rx_queue_intr_enable = eth_rxq_intr_enable, 1450 .rx_queue_intr_disable = eth_rxq_intr_disable, 1451 .get_monitor_addr = vhost_get_monitor_addr, 1452 }; 1453 1454 static int 1455 eth_dev_vhost_create(struct rte_vdev_device *dev, char *iface_name, 1456 int16_t queues, const unsigned int numa_node, uint64_t flags, 1457 uint64_t disable_flags) 1458 { 1459 const char *name = rte_vdev_device_name(dev); 1460 struct rte_eth_dev_data *data; 1461 struct pmd_internal *internal = NULL; 1462 struct rte_eth_dev *eth_dev = NULL; 1463 struct rte_ether_addr *eth_addr = NULL; 1464 1465 VHOST_LOG(INFO, "Creating VHOST-USER backend on numa socket %u\n", 1466 numa_node); 1467 1468 /* reserve an ethdev entry */ 1469 eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internal)); 1470 if (eth_dev == NULL) 1471 goto error; 1472 data = eth_dev->data; 1473 1474 eth_addr = rte_zmalloc_socket(name, sizeof(*eth_addr), 0, numa_node); 1475 if (eth_addr == NULL) 1476 goto error; 1477 data->mac_addrs = eth_addr; 1478 *eth_addr = base_eth_addr; 1479 eth_addr->addr_bytes[5] = eth_dev->data->port_id; 1480 1481 /* now put it all together 1482 * - store queue data in internal, 1483 * - point eth_dev_data to internals 1484 * - and point eth_dev structure to new eth_dev_data structure 1485 */ 1486 internal = eth_dev->data->dev_private; 1487 internal->iface_name = rte_malloc_socket(name, strlen(iface_name) + 1, 1488 0, numa_node); 1489 if (internal->iface_name == NULL) 1490 goto error; 1491 strcpy(internal->iface_name, iface_name); 1492 1493 data->nb_rx_queues = queues; 1494 data->nb_tx_queues = queues; 1495 internal->max_queues = queues; 1496 internal->vid = -1; 1497 internal->flags = flags; 1498 internal->disable_flags = disable_flags; 1499 data->dev_link = pmd_link; 1500 data->dev_flags = RTE_ETH_DEV_INTR_LSC | 1501 RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; 1502 data->promiscuous = 1; 1503 data->all_multicast = 1; 1504 1505 eth_dev->dev_ops = &ops; 1506 eth_dev->rx_queue_count = eth_rx_queue_count; 1507 1508 /* finally assign rx and tx ops */ 1509 eth_dev->rx_pkt_burst = eth_vhost_rx; 1510 eth_dev->tx_pkt_burst = eth_vhost_tx; 1511 1512 rte_eth_dev_probing_finish(eth_dev); 1513 return 0; 1514 1515 error: 1516 if (internal) 1517 rte_free(internal->iface_name); 1518 rte_eth_dev_release_port(eth_dev); 1519 1520 return -1; 1521 } 1522 1523 static inline int 1524 open_iface(const char *key __rte_unused, const char *value, void *extra_args) 1525 { 1526 const char **iface_name = extra_args; 1527 1528 if (value == NULL) 1529 return -1; 1530 1531 *iface_name = value; 1532 1533 return 0; 1534 } 1535 1536 static inline int 1537 open_int(const char *key __rte_unused, const char *value, void *extra_args) 1538 { 1539 uint16_t *n = extra_args; 1540 1541 if (value == NULL || extra_args == NULL) 1542 return -EINVAL; 1543 1544 *n = (uint16_t)strtoul(value, NULL, 0); 1545 if (*n == USHRT_MAX && errno == ERANGE) 1546 return -1; 1547 1548 return 0; 1549 } 1550 1551 static int 1552 rte_pmd_vhost_probe(struct rte_vdev_device *dev) 1553 { 1554 struct rte_kvargs *kvlist = NULL; 1555 int ret = 0; 1556 char *iface_name; 1557 uint16_t queues; 1558 uint64_t flags = 0; 1559 uint64_t disable_flags = 0; 1560 int client_mode = 0; 1561 int iommu_support = 0; 1562 int postcopy_support = 0; 1563 int tso = 0; 1564 int linear_buf = 0; 1565 int ext_buf = 0; 1566 struct rte_eth_dev *eth_dev; 1567 const char *name = rte_vdev_device_name(dev); 1568 1569 VHOST_LOG(INFO, "Initializing pmd_vhost for %s\n", name); 1570 1571 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 1572 eth_dev = rte_eth_dev_attach_secondary(name); 1573 if (!eth_dev) { 1574 VHOST_LOG(ERR, "Failed to probe %s\n", name); 1575 return -1; 1576 } 1577 eth_dev->rx_pkt_burst = eth_vhost_rx; 1578 eth_dev->tx_pkt_burst = eth_vhost_tx; 1579 eth_dev->dev_ops = &ops; 1580 if (dev->device.numa_node == SOCKET_ID_ANY) 1581 dev->device.numa_node = rte_socket_id(); 1582 eth_dev->device = &dev->device; 1583 rte_eth_dev_probing_finish(eth_dev); 1584 return 0; 1585 } 1586 1587 kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_arguments); 1588 if (kvlist == NULL) 1589 return -1; 1590 1591 if (rte_kvargs_count(kvlist, ETH_VHOST_IFACE_ARG) == 1) { 1592 ret = rte_kvargs_process(kvlist, ETH_VHOST_IFACE_ARG, 1593 &open_iface, &iface_name); 1594 if (ret < 0) 1595 goto out_free; 1596 } else { 1597 ret = -1; 1598 goto out_free; 1599 } 1600 1601 if (rte_kvargs_count(kvlist, ETH_VHOST_QUEUES_ARG) == 1) { 1602 ret = rte_kvargs_process(kvlist, ETH_VHOST_QUEUES_ARG, 1603 &open_int, &queues); 1604 if (ret < 0 || queues > RTE_MAX_QUEUES_PER_PORT) 1605 goto out_free; 1606 1607 } else 1608 queues = 1; 1609 1610 if (rte_kvargs_count(kvlist, ETH_VHOST_CLIENT_ARG) == 1) { 1611 ret = rte_kvargs_process(kvlist, ETH_VHOST_CLIENT_ARG, 1612 &open_int, &client_mode); 1613 if (ret < 0) 1614 goto out_free; 1615 1616 if (client_mode) 1617 flags |= RTE_VHOST_USER_CLIENT; 1618 } 1619 1620 if (rte_kvargs_count(kvlist, ETH_VHOST_IOMMU_SUPPORT) == 1) { 1621 ret = rte_kvargs_process(kvlist, ETH_VHOST_IOMMU_SUPPORT, 1622 &open_int, &iommu_support); 1623 if (ret < 0) 1624 goto out_free; 1625 1626 if (iommu_support) 1627 flags |= RTE_VHOST_USER_IOMMU_SUPPORT; 1628 } 1629 1630 if (rte_kvargs_count(kvlist, ETH_VHOST_POSTCOPY_SUPPORT) == 1) { 1631 ret = rte_kvargs_process(kvlist, ETH_VHOST_POSTCOPY_SUPPORT, 1632 &open_int, &postcopy_support); 1633 if (ret < 0) 1634 goto out_free; 1635 1636 if (postcopy_support) 1637 flags |= RTE_VHOST_USER_POSTCOPY_SUPPORT; 1638 } 1639 1640 if (rte_kvargs_count(kvlist, ETH_VHOST_VIRTIO_NET_F_HOST_TSO) == 1) { 1641 ret = rte_kvargs_process(kvlist, 1642 ETH_VHOST_VIRTIO_NET_F_HOST_TSO, 1643 &open_int, &tso); 1644 if (ret < 0) 1645 goto out_free; 1646 1647 if (tso == 0) { 1648 disable_flags |= (1ULL << VIRTIO_NET_F_HOST_TSO4); 1649 disable_flags |= (1ULL << VIRTIO_NET_F_HOST_TSO6); 1650 } 1651 } 1652 1653 if (rte_kvargs_count(kvlist, ETH_VHOST_LINEAR_BUF) == 1) { 1654 ret = rte_kvargs_process(kvlist, 1655 ETH_VHOST_LINEAR_BUF, 1656 &open_int, &linear_buf); 1657 if (ret < 0) 1658 goto out_free; 1659 1660 if (linear_buf == 1) 1661 flags |= RTE_VHOST_USER_LINEARBUF_SUPPORT; 1662 } 1663 1664 if (rte_kvargs_count(kvlist, ETH_VHOST_EXT_BUF) == 1) { 1665 ret = rte_kvargs_process(kvlist, 1666 ETH_VHOST_EXT_BUF, 1667 &open_int, &ext_buf); 1668 if (ret < 0) 1669 goto out_free; 1670 1671 if (ext_buf == 1) 1672 flags |= RTE_VHOST_USER_EXTBUF_SUPPORT; 1673 } 1674 1675 if (dev->device.numa_node == SOCKET_ID_ANY) 1676 dev->device.numa_node = rte_socket_id(); 1677 1678 ret = eth_dev_vhost_create(dev, iface_name, queues, 1679 dev->device.numa_node, flags, disable_flags); 1680 if (ret == -1) 1681 VHOST_LOG(ERR, "Failed to create %s\n", name); 1682 1683 out_free: 1684 rte_kvargs_free(kvlist); 1685 return ret; 1686 } 1687 1688 static int 1689 rte_pmd_vhost_remove(struct rte_vdev_device *dev) 1690 { 1691 const char *name; 1692 struct rte_eth_dev *eth_dev = NULL; 1693 1694 name = rte_vdev_device_name(dev); 1695 VHOST_LOG(INFO, "Un-Initializing pmd_vhost for %s\n", name); 1696 1697 /* find an ethdev entry */ 1698 eth_dev = rte_eth_dev_allocated(name); 1699 if (eth_dev == NULL) 1700 return 0; 1701 1702 eth_dev_close(eth_dev); 1703 rte_eth_dev_release_port(eth_dev); 1704 1705 return 0; 1706 } 1707 1708 static struct rte_vdev_driver pmd_vhost_drv = { 1709 .probe = rte_pmd_vhost_probe, 1710 .remove = rte_pmd_vhost_remove, 1711 }; 1712 1713 RTE_PMD_REGISTER_VDEV(net_vhost, pmd_vhost_drv); 1714 RTE_PMD_REGISTER_ALIAS(net_vhost, eth_vhost); 1715 RTE_PMD_REGISTER_PARAM_STRING(net_vhost, 1716 "iface=<ifc> " 1717 "queues=<int> " 1718 "client=<0|1> " 1719 "iommu-support=<0|1> " 1720 "postcopy-support=<0|1> " 1721 "tso=<0|1> " 1722 "linear-buffer=<0|1> " 1723 "ext-buffer=<0|1>"); 1724