1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2016 Intel Corporation 3 */ 4 5 #include <stdint.h> 6 #include <string.h> 7 #include <stdio.h> 8 #include <errno.h> 9 #include <unistd.h> 10 11 #include <rte_ethdev_driver.h> 12 #include <rte_ethdev_pci.h> 13 #include <rte_memcpy.h> 14 #include <rte_string_fns.h> 15 #include <rte_memzone.h> 16 #include <rte_malloc.h> 17 #include <rte_branch_prediction.h> 18 #include <rte_pci.h> 19 #include <rte_bus_pci.h> 20 #include <rte_ether.h> 21 #include <rte_ip.h> 22 #include <rte_arp.h> 23 #include <rte_common.h> 24 #include <rte_errno.h> 25 #include <rte_cpuflags.h> 26 27 #include <rte_memory.h> 28 #include <rte_eal.h> 29 #include <rte_dev.h> 30 #include <rte_cycles.h> 31 #include <rte_kvargs.h> 32 33 #include "virtio_ethdev.h" 34 #include "virtio_pci.h" 35 #include "virtio_logs.h" 36 #include "virtqueue.h" 37 #include "virtio_rxtx.h" 38 39 static int eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev); 40 static int virtio_dev_configure(struct rte_eth_dev *dev); 41 static int virtio_dev_start(struct rte_eth_dev *dev); 42 static void virtio_dev_stop(struct rte_eth_dev *dev); 43 static void virtio_dev_promiscuous_enable(struct rte_eth_dev *dev); 44 static void virtio_dev_promiscuous_disable(struct rte_eth_dev *dev); 45 static void virtio_dev_allmulticast_enable(struct rte_eth_dev *dev); 46 static void virtio_dev_allmulticast_disable(struct rte_eth_dev *dev); 47 static void virtio_dev_info_get(struct rte_eth_dev *dev, 48 struct rte_eth_dev_info *dev_info); 49 static int virtio_dev_link_update(struct rte_eth_dev *dev, 50 int wait_to_complete); 51 static int virtio_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask); 52 53 static void virtio_set_hwaddr(struct virtio_hw *hw); 54 static void virtio_get_hwaddr(struct virtio_hw *hw); 55 56 static int virtio_dev_stats_get(struct rte_eth_dev *dev, 57 struct rte_eth_stats *stats); 58 static int virtio_dev_xstats_get(struct rte_eth_dev *dev, 59 struct rte_eth_xstat *xstats, unsigned n); 60 static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev, 61 struct rte_eth_xstat_name *xstats_names, 62 unsigned limit); 63 static void virtio_dev_stats_reset(struct rte_eth_dev *dev); 64 static void virtio_dev_free_mbufs(struct rte_eth_dev *dev); 65 static int virtio_vlan_filter_set(struct rte_eth_dev *dev, 66 uint16_t vlan_id, int on); 67 static int virtio_mac_addr_add(struct rte_eth_dev *dev, 68 struct ether_addr *mac_addr, 69 uint32_t index, uint32_t vmdq); 70 static void virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index); 71 static int virtio_mac_addr_set(struct rte_eth_dev *dev, 72 struct ether_addr *mac_addr); 73 74 static int virtio_intr_disable(struct rte_eth_dev *dev); 75 76 static int virtio_dev_queue_stats_mapping_set( 77 struct rte_eth_dev *eth_dev, 78 uint16_t queue_id, 79 uint8_t stat_idx, 80 uint8_t is_rx); 81 82 int virtio_logtype_init; 83 int virtio_logtype_driver; 84 85 static void virtio_notify_peers(struct rte_eth_dev *dev); 86 static void virtio_ack_link_announce(struct rte_eth_dev *dev); 87 88 /* 89 * The set of PCI devices this driver supports 90 */ 91 static const struct rte_pci_id pci_id_virtio_map[] = { 92 { RTE_PCI_DEVICE(VIRTIO_PCI_VENDORID, VIRTIO_PCI_LEGACY_DEVICEID_NET) }, 93 { RTE_PCI_DEVICE(VIRTIO_PCI_VENDORID, VIRTIO_PCI_MODERN_DEVICEID_NET) }, 94 { .vendor_id = 0, /* sentinel */ }, 95 }; 96 97 struct rte_virtio_xstats_name_off { 98 char name[RTE_ETH_XSTATS_NAME_SIZE]; 99 unsigned offset; 100 }; 101 102 /* [rt]x_qX_ is prepended to the name string here */ 103 static const struct rte_virtio_xstats_name_off rte_virtio_rxq_stat_strings[] = { 104 {"good_packets", offsetof(struct virtnet_rx, stats.packets)}, 105 {"good_bytes", offsetof(struct virtnet_rx, stats.bytes)}, 106 {"errors", offsetof(struct virtnet_rx, stats.errors)}, 107 {"multicast_packets", offsetof(struct virtnet_rx, stats.multicast)}, 108 {"broadcast_packets", offsetof(struct virtnet_rx, stats.broadcast)}, 109 {"undersize_packets", offsetof(struct virtnet_rx, stats.size_bins[0])}, 110 {"size_64_packets", offsetof(struct virtnet_rx, stats.size_bins[1])}, 111 {"size_65_127_packets", offsetof(struct virtnet_rx, stats.size_bins[2])}, 112 {"size_128_255_packets", offsetof(struct virtnet_rx, stats.size_bins[3])}, 113 {"size_256_511_packets", offsetof(struct virtnet_rx, stats.size_bins[4])}, 114 {"size_512_1023_packets", offsetof(struct virtnet_rx, stats.size_bins[5])}, 115 {"size_1024_1518_packets", offsetof(struct virtnet_rx, stats.size_bins[6])}, 116 {"size_1519_max_packets", offsetof(struct virtnet_rx, stats.size_bins[7])}, 117 }; 118 119 /* [rt]x_qX_ is prepended to the name string here */ 120 static const struct rte_virtio_xstats_name_off rte_virtio_txq_stat_strings[] = { 121 {"good_packets", offsetof(struct virtnet_tx, stats.packets)}, 122 {"good_bytes", offsetof(struct virtnet_tx, stats.bytes)}, 123 {"errors", offsetof(struct virtnet_tx, stats.errors)}, 124 {"multicast_packets", offsetof(struct virtnet_tx, stats.multicast)}, 125 {"broadcast_packets", offsetof(struct virtnet_tx, stats.broadcast)}, 126 {"undersize_packets", offsetof(struct virtnet_tx, stats.size_bins[0])}, 127 {"size_64_packets", offsetof(struct virtnet_tx, stats.size_bins[1])}, 128 {"size_65_127_packets", offsetof(struct virtnet_tx, stats.size_bins[2])}, 129 {"size_128_255_packets", offsetof(struct virtnet_tx, stats.size_bins[3])}, 130 {"size_256_511_packets", offsetof(struct virtnet_tx, stats.size_bins[4])}, 131 {"size_512_1023_packets", offsetof(struct virtnet_tx, stats.size_bins[5])}, 132 {"size_1024_1518_packets", offsetof(struct virtnet_tx, stats.size_bins[6])}, 133 {"size_1519_max_packets", offsetof(struct virtnet_tx, stats.size_bins[7])}, 134 }; 135 136 #define VIRTIO_NB_RXQ_XSTATS (sizeof(rte_virtio_rxq_stat_strings) / \ 137 sizeof(rte_virtio_rxq_stat_strings[0])) 138 #define VIRTIO_NB_TXQ_XSTATS (sizeof(rte_virtio_txq_stat_strings) / \ 139 sizeof(rte_virtio_txq_stat_strings[0])) 140 141 struct virtio_hw_internal virtio_hw_internal[RTE_MAX_ETHPORTS]; 142 143 static int 144 virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl, 145 int *dlen, int pkt_num) 146 { 147 uint32_t head, i; 148 int k, sum = 0; 149 virtio_net_ctrl_ack status = ~0; 150 struct virtio_pmd_ctrl *result; 151 struct virtqueue *vq; 152 153 ctrl->status = status; 154 155 if (!cvq || !cvq->vq) { 156 PMD_INIT_LOG(ERR, "Control queue is not supported."); 157 return -1; 158 } 159 160 rte_spinlock_lock(&cvq->lock); 161 vq = cvq->vq; 162 head = vq->vq_desc_head_idx; 163 164 PMD_INIT_LOG(DEBUG, "vq->vq_desc_head_idx = %d, status = %d, " 165 "vq->hw->cvq = %p vq = %p", 166 vq->vq_desc_head_idx, status, vq->hw->cvq, vq); 167 168 if (vq->vq_free_cnt < pkt_num + 2 || pkt_num < 1) { 169 rte_spinlock_unlock(&cvq->lock); 170 return -1; 171 } 172 173 memcpy(cvq->virtio_net_hdr_mz->addr, ctrl, 174 sizeof(struct virtio_pmd_ctrl)); 175 176 /* 177 * Format is enforced in qemu code: 178 * One TX packet for header; 179 * At least one TX packet per argument; 180 * One RX packet for ACK. 181 */ 182 vq->vq_ring.desc[head].flags = VRING_DESC_F_NEXT; 183 vq->vq_ring.desc[head].addr = cvq->virtio_net_hdr_mem; 184 vq->vq_ring.desc[head].len = sizeof(struct virtio_net_ctrl_hdr); 185 vq->vq_free_cnt--; 186 i = vq->vq_ring.desc[head].next; 187 188 for (k = 0; k < pkt_num; k++) { 189 vq->vq_ring.desc[i].flags = VRING_DESC_F_NEXT; 190 vq->vq_ring.desc[i].addr = cvq->virtio_net_hdr_mem 191 + sizeof(struct virtio_net_ctrl_hdr) 192 + sizeof(ctrl->status) + sizeof(uint8_t)*sum; 193 vq->vq_ring.desc[i].len = dlen[k]; 194 sum += dlen[k]; 195 vq->vq_free_cnt--; 196 i = vq->vq_ring.desc[i].next; 197 } 198 199 vq->vq_ring.desc[i].flags = VRING_DESC_F_WRITE; 200 vq->vq_ring.desc[i].addr = cvq->virtio_net_hdr_mem 201 + sizeof(struct virtio_net_ctrl_hdr); 202 vq->vq_ring.desc[i].len = sizeof(ctrl->status); 203 vq->vq_free_cnt--; 204 205 vq->vq_desc_head_idx = vq->vq_ring.desc[i].next; 206 207 vq_update_avail_ring(vq, head); 208 vq_update_avail_idx(vq); 209 210 PMD_INIT_LOG(DEBUG, "vq->vq_queue_index = %d", vq->vq_queue_index); 211 212 virtqueue_notify(vq); 213 214 rte_rmb(); 215 while (VIRTQUEUE_NUSED(vq) == 0) { 216 rte_rmb(); 217 usleep(100); 218 } 219 220 while (VIRTQUEUE_NUSED(vq)) { 221 uint32_t idx, desc_idx, used_idx; 222 struct vring_used_elem *uep; 223 224 used_idx = (uint32_t)(vq->vq_used_cons_idx 225 & (vq->vq_nentries - 1)); 226 uep = &vq->vq_ring.used->ring[used_idx]; 227 idx = (uint32_t) uep->id; 228 desc_idx = idx; 229 230 while (vq->vq_ring.desc[desc_idx].flags & VRING_DESC_F_NEXT) { 231 desc_idx = vq->vq_ring.desc[desc_idx].next; 232 vq->vq_free_cnt++; 233 } 234 235 vq->vq_ring.desc[desc_idx].next = vq->vq_desc_head_idx; 236 vq->vq_desc_head_idx = idx; 237 238 vq->vq_used_cons_idx++; 239 vq->vq_free_cnt++; 240 } 241 242 PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\nvq->vq_desc_head_idx=%d", 243 vq->vq_free_cnt, vq->vq_desc_head_idx); 244 245 result = cvq->virtio_net_hdr_mz->addr; 246 247 rte_spinlock_unlock(&cvq->lock); 248 return result->status; 249 } 250 251 static int 252 virtio_set_multiple_queues(struct rte_eth_dev *dev, uint16_t nb_queues) 253 { 254 struct virtio_hw *hw = dev->data->dev_private; 255 struct virtio_pmd_ctrl ctrl; 256 int dlen[1]; 257 int ret; 258 259 ctrl.hdr.class = VIRTIO_NET_CTRL_MQ; 260 ctrl.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET; 261 memcpy(ctrl.data, &nb_queues, sizeof(uint16_t)); 262 263 dlen[0] = sizeof(uint16_t); 264 265 ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1); 266 if (ret) { 267 PMD_INIT_LOG(ERR, "Multiqueue configured but send command " 268 "failed, this is too late now..."); 269 return -EINVAL; 270 } 271 272 return 0; 273 } 274 275 static void 276 virtio_dev_queue_release(void *queue __rte_unused) 277 { 278 /* do nothing */ 279 } 280 281 static uint16_t 282 virtio_get_nr_vq(struct virtio_hw *hw) 283 { 284 uint16_t nr_vq = hw->max_queue_pairs * 2; 285 286 if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VQ)) 287 nr_vq += 1; 288 289 return nr_vq; 290 } 291 292 static void 293 virtio_init_vring(struct virtqueue *vq) 294 { 295 int size = vq->vq_nentries; 296 struct vring *vr = &vq->vq_ring; 297 uint8_t *ring_mem = vq->vq_ring_virt_mem; 298 299 PMD_INIT_FUNC_TRACE(); 300 301 /* 302 * Reinitialise since virtio port might have been stopped and restarted 303 */ 304 memset(ring_mem, 0, vq->vq_ring_size); 305 vring_init(vr, size, ring_mem, VIRTIO_PCI_VRING_ALIGN); 306 vq->vq_used_cons_idx = 0; 307 vq->vq_desc_head_idx = 0; 308 vq->vq_avail_idx = 0; 309 vq->vq_desc_tail_idx = (uint16_t)(vq->vq_nentries - 1); 310 vq->vq_free_cnt = vq->vq_nentries; 311 memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries); 312 313 vring_desc_init(vr->desc, size); 314 315 /* 316 * Disable device(host) interrupting guest 317 */ 318 virtqueue_disable_intr(vq); 319 } 320 321 static int 322 virtio_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_queue_idx) 323 { 324 char vq_name[VIRTQUEUE_MAX_NAME_SZ]; 325 char vq_hdr_name[VIRTQUEUE_MAX_NAME_SZ]; 326 const struct rte_memzone *mz = NULL, *hdr_mz = NULL; 327 unsigned int vq_size, size; 328 struct virtio_hw *hw = dev->data->dev_private; 329 struct virtnet_rx *rxvq = NULL; 330 struct virtnet_tx *txvq = NULL; 331 struct virtnet_ctl *cvq = NULL; 332 struct virtqueue *vq; 333 size_t sz_hdr_mz = 0; 334 void *sw_ring = NULL; 335 int queue_type = virtio_get_queue_type(hw, vtpci_queue_idx); 336 int ret; 337 338 PMD_INIT_LOG(DEBUG, "setting up queue: %u", vtpci_queue_idx); 339 340 /* 341 * Read the virtqueue size from the Queue Size field 342 * Always power of 2 and if 0 virtqueue does not exist 343 */ 344 vq_size = VTPCI_OPS(hw)->get_queue_num(hw, vtpci_queue_idx); 345 PMD_INIT_LOG(DEBUG, "vq_size: %u", vq_size); 346 if (vq_size == 0) { 347 PMD_INIT_LOG(ERR, "virtqueue does not exist"); 348 return -EINVAL; 349 } 350 351 if (!rte_is_power_of_2(vq_size)) { 352 PMD_INIT_LOG(ERR, "virtqueue size is not powerof 2"); 353 return -EINVAL; 354 } 355 356 snprintf(vq_name, sizeof(vq_name), "port%d_vq%d", 357 dev->data->port_id, vtpci_queue_idx); 358 359 size = RTE_ALIGN_CEIL(sizeof(*vq) + 360 vq_size * sizeof(struct vq_desc_extra), 361 RTE_CACHE_LINE_SIZE); 362 if (queue_type == VTNET_TQ) { 363 /* 364 * For each xmit packet, allocate a virtio_net_hdr 365 * and indirect ring elements 366 */ 367 sz_hdr_mz = vq_size * sizeof(struct virtio_tx_region); 368 } else if (queue_type == VTNET_CQ) { 369 /* Allocate a page for control vq command, data and status */ 370 sz_hdr_mz = PAGE_SIZE; 371 } 372 373 vq = rte_zmalloc_socket(vq_name, size, RTE_CACHE_LINE_SIZE, 374 SOCKET_ID_ANY); 375 if (vq == NULL) { 376 PMD_INIT_LOG(ERR, "can not allocate vq"); 377 return -ENOMEM; 378 } 379 hw->vqs[vtpci_queue_idx] = vq; 380 381 vq->hw = hw; 382 vq->vq_queue_index = vtpci_queue_idx; 383 vq->vq_nentries = vq_size; 384 385 /* 386 * Reserve a memzone for vring elements 387 */ 388 size = vring_size(vq_size, VIRTIO_PCI_VRING_ALIGN); 389 vq->vq_ring_size = RTE_ALIGN_CEIL(size, VIRTIO_PCI_VRING_ALIGN); 390 PMD_INIT_LOG(DEBUG, "vring_size: %d, rounded_vring_size: %d", 391 size, vq->vq_ring_size); 392 393 mz = rte_memzone_reserve_aligned(vq_name, vq->vq_ring_size, 394 SOCKET_ID_ANY, RTE_MEMZONE_IOVA_CONTIG, 395 VIRTIO_PCI_VRING_ALIGN); 396 if (mz == NULL) { 397 if (rte_errno == EEXIST) 398 mz = rte_memzone_lookup(vq_name); 399 if (mz == NULL) { 400 ret = -ENOMEM; 401 goto fail_q_alloc; 402 } 403 } 404 405 memset(mz->addr, 0, mz->len); 406 407 vq->vq_ring_mem = mz->iova; 408 vq->vq_ring_virt_mem = mz->addr; 409 PMD_INIT_LOG(DEBUG, "vq->vq_ring_mem: 0x%" PRIx64, 410 (uint64_t)mz->iova); 411 PMD_INIT_LOG(DEBUG, "vq->vq_ring_virt_mem: 0x%" PRIx64, 412 (uint64_t)(uintptr_t)mz->addr); 413 414 virtio_init_vring(vq); 415 416 if (sz_hdr_mz) { 417 snprintf(vq_hdr_name, sizeof(vq_hdr_name), "port%d_vq%d_hdr", 418 dev->data->port_id, vtpci_queue_idx); 419 hdr_mz = rte_memzone_reserve_aligned(vq_hdr_name, sz_hdr_mz, 420 SOCKET_ID_ANY, RTE_MEMZONE_IOVA_CONTIG, 421 RTE_CACHE_LINE_SIZE); 422 if (hdr_mz == NULL) { 423 if (rte_errno == EEXIST) 424 hdr_mz = rte_memzone_lookup(vq_hdr_name); 425 if (hdr_mz == NULL) { 426 ret = -ENOMEM; 427 goto fail_q_alloc; 428 } 429 } 430 } 431 432 if (queue_type == VTNET_RQ) { 433 size_t sz_sw = (RTE_PMD_VIRTIO_RX_MAX_BURST + vq_size) * 434 sizeof(vq->sw_ring[0]); 435 436 sw_ring = rte_zmalloc_socket("sw_ring", sz_sw, 437 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); 438 if (!sw_ring) { 439 PMD_INIT_LOG(ERR, "can not allocate RX soft ring"); 440 ret = -ENOMEM; 441 goto fail_q_alloc; 442 } 443 444 vq->sw_ring = sw_ring; 445 rxvq = &vq->rxq; 446 rxvq->vq = vq; 447 rxvq->port_id = dev->data->port_id; 448 rxvq->mz = mz; 449 } else if (queue_type == VTNET_TQ) { 450 txvq = &vq->txq; 451 txvq->vq = vq; 452 txvq->port_id = dev->data->port_id; 453 txvq->mz = mz; 454 txvq->virtio_net_hdr_mz = hdr_mz; 455 txvq->virtio_net_hdr_mem = hdr_mz->iova; 456 } else if (queue_type == VTNET_CQ) { 457 cvq = &vq->cq; 458 cvq->vq = vq; 459 cvq->mz = mz; 460 cvq->virtio_net_hdr_mz = hdr_mz; 461 cvq->virtio_net_hdr_mem = hdr_mz->iova; 462 memset(cvq->virtio_net_hdr_mz->addr, 0, PAGE_SIZE); 463 464 hw->cvq = cvq; 465 } 466 467 /* For virtio_user case (that is when hw->dev is NULL), we use 468 * virtual address. And we need properly set _offset_, please see 469 * VIRTIO_MBUF_DATA_DMA_ADDR in virtqueue.h for more information. 470 */ 471 if (!hw->virtio_user_dev) 472 vq->offset = offsetof(struct rte_mbuf, buf_iova); 473 else { 474 vq->vq_ring_mem = (uintptr_t)mz->addr; 475 vq->offset = offsetof(struct rte_mbuf, buf_addr); 476 if (queue_type == VTNET_TQ) 477 txvq->virtio_net_hdr_mem = (uintptr_t)hdr_mz->addr; 478 else if (queue_type == VTNET_CQ) 479 cvq->virtio_net_hdr_mem = (uintptr_t)hdr_mz->addr; 480 } 481 482 if (queue_type == VTNET_TQ) { 483 struct virtio_tx_region *txr; 484 unsigned int i; 485 486 txr = hdr_mz->addr; 487 memset(txr, 0, vq_size * sizeof(*txr)); 488 for (i = 0; i < vq_size; i++) { 489 struct vring_desc *start_dp = txr[i].tx_indir; 490 491 vring_desc_init(start_dp, RTE_DIM(txr[i].tx_indir)); 492 493 /* first indirect descriptor is always the tx header */ 494 start_dp->addr = txvq->virtio_net_hdr_mem 495 + i * sizeof(*txr) 496 + offsetof(struct virtio_tx_region, tx_hdr); 497 498 start_dp->len = hw->vtnet_hdr_size; 499 start_dp->flags = VRING_DESC_F_NEXT; 500 } 501 } 502 503 if (VTPCI_OPS(hw)->setup_queue(hw, vq) < 0) { 504 PMD_INIT_LOG(ERR, "setup_queue failed"); 505 return -EINVAL; 506 } 507 508 return 0; 509 510 fail_q_alloc: 511 rte_free(sw_ring); 512 rte_memzone_free(hdr_mz); 513 rte_memzone_free(mz); 514 rte_free(vq); 515 516 return ret; 517 } 518 519 static void 520 virtio_free_queues(struct virtio_hw *hw) 521 { 522 uint16_t nr_vq = virtio_get_nr_vq(hw); 523 struct virtqueue *vq; 524 int queue_type; 525 uint16_t i; 526 527 if (hw->vqs == NULL) 528 return; 529 530 for (i = 0; i < nr_vq; i++) { 531 vq = hw->vqs[i]; 532 if (!vq) 533 continue; 534 535 queue_type = virtio_get_queue_type(hw, i); 536 if (queue_type == VTNET_RQ) { 537 rte_free(vq->sw_ring); 538 rte_memzone_free(vq->rxq.mz); 539 } else if (queue_type == VTNET_TQ) { 540 rte_memzone_free(vq->txq.mz); 541 rte_memzone_free(vq->txq.virtio_net_hdr_mz); 542 } else { 543 rte_memzone_free(vq->cq.mz); 544 rte_memzone_free(vq->cq.virtio_net_hdr_mz); 545 } 546 547 rte_free(vq); 548 hw->vqs[i] = NULL; 549 } 550 551 rte_free(hw->vqs); 552 hw->vqs = NULL; 553 } 554 555 static int 556 virtio_alloc_queues(struct rte_eth_dev *dev) 557 { 558 struct virtio_hw *hw = dev->data->dev_private; 559 uint16_t nr_vq = virtio_get_nr_vq(hw); 560 uint16_t i; 561 int ret; 562 563 hw->vqs = rte_zmalloc(NULL, sizeof(struct virtqueue *) * nr_vq, 0); 564 if (!hw->vqs) { 565 PMD_INIT_LOG(ERR, "failed to allocate vqs"); 566 return -ENOMEM; 567 } 568 569 for (i = 0; i < nr_vq; i++) { 570 ret = virtio_init_queue(dev, i); 571 if (ret < 0) { 572 virtio_free_queues(hw); 573 return ret; 574 } 575 } 576 577 return 0; 578 } 579 580 static void virtio_queues_unbind_intr(struct rte_eth_dev *dev); 581 582 static void 583 virtio_dev_close(struct rte_eth_dev *dev) 584 { 585 struct virtio_hw *hw = dev->data->dev_private; 586 struct rte_intr_conf *intr_conf = &dev->data->dev_conf.intr_conf; 587 588 PMD_INIT_LOG(DEBUG, "virtio_dev_close"); 589 590 if (!hw->opened) 591 return; 592 hw->opened = false; 593 594 /* reset the NIC */ 595 if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) 596 VTPCI_OPS(hw)->set_config_irq(hw, VIRTIO_MSI_NO_VECTOR); 597 if (intr_conf->rxq) 598 virtio_queues_unbind_intr(dev); 599 600 if (intr_conf->lsc || intr_conf->rxq) { 601 virtio_intr_disable(dev); 602 rte_intr_efd_disable(dev->intr_handle); 603 rte_free(dev->intr_handle->intr_vec); 604 dev->intr_handle->intr_vec = NULL; 605 } 606 607 vtpci_reset(hw); 608 virtio_dev_free_mbufs(dev); 609 virtio_free_queues(hw); 610 } 611 612 static void 613 virtio_dev_promiscuous_enable(struct rte_eth_dev *dev) 614 { 615 struct virtio_hw *hw = dev->data->dev_private; 616 struct virtio_pmd_ctrl ctrl; 617 int dlen[1]; 618 int ret; 619 620 if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) { 621 PMD_INIT_LOG(INFO, "host does not support rx control"); 622 return; 623 } 624 625 ctrl.hdr.class = VIRTIO_NET_CTRL_RX; 626 ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC; 627 ctrl.data[0] = 1; 628 dlen[0] = 1; 629 630 ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1); 631 if (ret) 632 PMD_INIT_LOG(ERR, "Failed to enable promisc"); 633 } 634 635 static void 636 virtio_dev_promiscuous_disable(struct rte_eth_dev *dev) 637 { 638 struct virtio_hw *hw = dev->data->dev_private; 639 struct virtio_pmd_ctrl ctrl; 640 int dlen[1]; 641 int ret; 642 643 if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) { 644 PMD_INIT_LOG(INFO, "host does not support rx control"); 645 return; 646 } 647 648 ctrl.hdr.class = VIRTIO_NET_CTRL_RX; 649 ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC; 650 ctrl.data[0] = 0; 651 dlen[0] = 1; 652 653 ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1); 654 if (ret) 655 PMD_INIT_LOG(ERR, "Failed to disable promisc"); 656 } 657 658 static void 659 virtio_dev_allmulticast_enable(struct rte_eth_dev *dev) 660 { 661 struct virtio_hw *hw = dev->data->dev_private; 662 struct virtio_pmd_ctrl ctrl; 663 int dlen[1]; 664 int ret; 665 666 if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) { 667 PMD_INIT_LOG(INFO, "host does not support rx control"); 668 return; 669 } 670 671 ctrl.hdr.class = VIRTIO_NET_CTRL_RX; 672 ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI; 673 ctrl.data[0] = 1; 674 dlen[0] = 1; 675 676 ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1); 677 if (ret) 678 PMD_INIT_LOG(ERR, "Failed to enable allmulticast"); 679 } 680 681 static void 682 virtio_dev_allmulticast_disable(struct rte_eth_dev *dev) 683 { 684 struct virtio_hw *hw = dev->data->dev_private; 685 struct virtio_pmd_ctrl ctrl; 686 int dlen[1]; 687 int ret; 688 689 if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) { 690 PMD_INIT_LOG(INFO, "host does not support rx control"); 691 return; 692 } 693 694 ctrl.hdr.class = VIRTIO_NET_CTRL_RX; 695 ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI; 696 ctrl.data[0] = 0; 697 dlen[0] = 1; 698 699 ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1); 700 if (ret) 701 PMD_INIT_LOG(ERR, "Failed to disable allmulticast"); 702 } 703 704 #define VLAN_TAG_LEN 4 /* 802.3ac tag (not DMA'd) */ 705 static int 706 virtio_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) 707 { 708 struct virtio_hw *hw = dev->data->dev_private; 709 uint32_t ether_hdr_len = ETHER_HDR_LEN + VLAN_TAG_LEN + 710 hw->vtnet_hdr_size; 711 uint32_t frame_size = mtu + ether_hdr_len; 712 uint32_t max_frame_size = hw->max_mtu + ether_hdr_len; 713 714 max_frame_size = RTE_MIN(max_frame_size, VIRTIO_MAX_RX_PKTLEN); 715 716 if (mtu < ETHER_MIN_MTU || frame_size > max_frame_size) { 717 PMD_INIT_LOG(ERR, "MTU should be between %d and %d", 718 ETHER_MIN_MTU, max_frame_size - ether_hdr_len); 719 return -EINVAL; 720 } 721 return 0; 722 } 723 724 static int 725 virtio_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) 726 { 727 struct virtnet_rx *rxvq = dev->data->rx_queues[queue_id]; 728 struct virtqueue *vq = rxvq->vq; 729 730 virtqueue_enable_intr(vq); 731 virtio_mb(); 732 return 0; 733 } 734 735 static int 736 virtio_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id) 737 { 738 struct virtnet_rx *rxvq = dev->data->rx_queues[queue_id]; 739 struct virtqueue *vq = rxvq->vq; 740 741 virtqueue_disable_intr(vq); 742 return 0; 743 } 744 745 /* 746 * dev_ops for virtio, bare necessities for basic operation 747 */ 748 static const struct eth_dev_ops virtio_eth_dev_ops = { 749 .dev_configure = virtio_dev_configure, 750 .dev_start = virtio_dev_start, 751 .dev_stop = virtio_dev_stop, 752 .dev_close = virtio_dev_close, 753 .promiscuous_enable = virtio_dev_promiscuous_enable, 754 .promiscuous_disable = virtio_dev_promiscuous_disable, 755 .allmulticast_enable = virtio_dev_allmulticast_enable, 756 .allmulticast_disable = virtio_dev_allmulticast_disable, 757 .mtu_set = virtio_mtu_set, 758 .dev_infos_get = virtio_dev_info_get, 759 .stats_get = virtio_dev_stats_get, 760 .xstats_get = virtio_dev_xstats_get, 761 .xstats_get_names = virtio_dev_xstats_get_names, 762 .stats_reset = virtio_dev_stats_reset, 763 .xstats_reset = virtio_dev_stats_reset, 764 .link_update = virtio_dev_link_update, 765 .vlan_offload_set = virtio_dev_vlan_offload_set, 766 .rx_queue_setup = virtio_dev_rx_queue_setup, 767 .rx_queue_intr_enable = virtio_dev_rx_queue_intr_enable, 768 .rx_queue_intr_disable = virtio_dev_rx_queue_intr_disable, 769 .rx_queue_release = virtio_dev_queue_release, 770 .rx_descriptor_done = virtio_dev_rx_queue_done, 771 .tx_queue_setup = virtio_dev_tx_queue_setup, 772 .tx_queue_release = virtio_dev_queue_release, 773 /* collect stats per queue */ 774 .queue_stats_mapping_set = virtio_dev_queue_stats_mapping_set, 775 .vlan_filter_set = virtio_vlan_filter_set, 776 .mac_addr_add = virtio_mac_addr_add, 777 .mac_addr_remove = virtio_mac_addr_remove, 778 .mac_addr_set = virtio_mac_addr_set, 779 }; 780 781 /* 782 * dev_ops for virtio-user in secondary processes, as we just have 783 * some limited supports currently. 784 */ 785 const struct eth_dev_ops virtio_user_secondary_eth_dev_ops = { 786 .dev_infos_get = virtio_dev_info_get, 787 .stats_get = virtio_dev_stats_get, 788 .xstats_get = virtio_dev_xstats_get, 789 .xstats_get_names = virtio_dev_xstats_get_names, 790 .stats_reset = virtio_dev_stats_reset, 791 .xstats_reset = virtio_dev_stats_reset, 792 /* collect stats per queue */ 793 .queue_stats_mapping_set = virtio_dev_queue_stats_mapping_set, 794 }; 795 796 static void 797 virtio_update_stats(struct rte_eth_dev *dev, struct rte_eth_stats *stats) 798 { 799 unsigned i; 800 801 for (i = 0; i < dev->data->nb_tx_queues; i++) { 802 const struct virtnet_tx *txvq = dev->data->tx_queues[i]; 803 if (txvq == NULL) 804 continue; 805 806 stats->opackets += txvq->stats.packets; 807 stats->obytes += txvq->stats.bytes; 808 stats->oerrors += txvq->stats.errors; 809 810 if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) { 811 stats->q_opackets[i] = txvq->stats.packets; 812 stats->q_obytes[i] = txvq->stats.bytes; 813 } 814 } 815 816 for (i = 0; i < dev->data->nb_rx_queues; i++) { 817 const struct virtnet_rx *rxvq = dev->data->rx_queues[i]; 818 if (rxvq == NULL) 819 continue; 820 821 stats->ipackets += rxvq->stats.packets; 822 stats->ibytes += rxvq->stats.bytes; 823 stats->ierrors += rxvq->stats.errors; 824 825 if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) { 826 stats->q_ipackets[i] = rxvq->stats.packets; 827 stats->q_ibytes[i] = rxvq->stats.bytes; 828 } 829 } 830 831 stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed; 832 } 833 834 static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev, 835 struct rte_eth_xstat_name *xstats_names, 836 __rte_unused unsigned limit) 837 { 838 unsigned i; 839 unsigned count = 0; 840 unsigned t; 841 842 unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_TXQ_XSTATS + 843 dev->data->nb_rx_queues * VIRTIO_NB_RXQ_XSTATS; 844 845 if (xstats_names != NULL) { 846 /* Note: limit checked in rte_eth_xstats_names() */ 847 848 for (i = 0; i < dev->data->nb_rx_queues; i++) { 849 struct virtnet_rx *rxvq = dev->data->rx_queues[i]; 850 if (rxvq == NULL) 851 continue; 852 for (t = 0; t < VIRTIO_NB_RXQ_XSTATS; t++) { 853 snprintf(xstats_names[count].name, 854 sizeof(xstats_names[count].name), 855 "rx_q%u_%s", i, 856 rte_virtio_rxq_stat_strings[t].name); 857 count++; 858 } 859 } 860 861 for (i = 0; i < dev->data->nb_tx_queues; i++) { 862 struct virtnet_tx *txvq = dev->data->tx_queues[i]; 863 if (txvq == NULL) 864 continue; 865 for (t = 0; t < VIRTIO_NB_TXQ_XSTATS; t++) { 866 snprintf(xstats_names[count].name, 867 sizeof(xstats_names[count].name), 868 "tx_q%u_%s", i, 869 rte_virtio_txq_stat_strings[t].name); 870 count++; 871 } 872 } 873 return count; 874 } 875 return nstats; 876 } 877 878 static int 879 virtio_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, 880 unsigned n) 881 { 882 unsigned i; 883 unsigned count = 0; 884 885 unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_TXQ_XSTATS + 886 dev->data->nb_rx_queues * VIRTIO_NB_RXQ_XSTATS; 887 888 if (n < nstats) 889 return nstats; 890 891 for (i = 0; i < dev->data->nb_rx_queues; i++) { 892 struct virtnet_rx *rxvq = dev->data->rx_queues[i]; 893 894 if (rxvq == NULL) 895 continue; 896 897 unsigned t; 898 899 for (t = 0; t < VIRTIO_NB_RXQ_XSTATS; t++) { 900 xstats[count].value = *(uint64_t *)(((char *)rxvq) + 901 rte_virtio_rxq_stat_strings[t].offset); 902 xstats[count].id = count; 903 count++; 904 } 905 } 906 907 for (i = 0; i < dev->data->nb_tx_queues; i++) { 908 struct virtnet_tx *txvq = dev->data->tx_queues[i]; 909 910 if (txvq == NULL) 911 continue; 912 913 unsigned t; 914 915 for (t = 0; t < VIRTIO_NB_TXQ_XSTATS; t++) { 916 xstats[count].value = *(uint64_t *)(((char *)txvq) + 917 rte_virtio_txq_stat_strings[t].offset); 918 xstats[count].id = count; 919 count++; 920 } 921 } 922 923 return count; 924 } 925 926 static int 927 virtio_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) 928 { 929 virtio_update_stats(dev, stats); 930 931 return 0; 932 } 933 934 static void 935 virtio_dev_stats_reset(struct rte_eth_dev *dev) 936 { 937 unsigned int i; 938 939 for (i = 0; i < dev->data->nb_tx_queues; i++) { 940 struct virtnet_tx *txvq = dev->data->tx_queues[i]; 941 if (txvq == NULL) 942 continue; 943 944 txvq->stats.packets = 0; 945 txvq->stats.bytes = 0; 946 txvq->stats.errors = 0; 947 txvq->stats.multicast = 0; 948 txvq->stats.broadcast = 0; 949 memset(txvq->stats.size_bins, 0, 950 sizeof(txvq->stats.size_bins[0]) * 8); 951 } 952 953 for (i = 0; i < dev->data->nb_rx_queues; i++) { 954 struct virtnet_rx *rxvq = dev->data->rx_queues[i]; 955 if (rxvq == NULL) 956 continue; 957 958 rxvq->stats.packets = 0; 959 rxvq->stats.bytes = 0; 960 rxvq->stats.errors = 0; 961 rxvq->stats.multicast = 0; 962 rxvq->stats.broadcast = 0; 963 memset(rxvq->stats.size_bins, 0, 964 sizeof(rxvq->stats.size_bins[0]) * 8); 965 } 966 } 967 968 static void 969 virtio_set_hwaddr(struct virtio_hw *hw) 970 { 971 vtpci_write_dev_config(hw, 972 offsetof(struct virtio_net_config, mac), 973 &hw->mac_addr, ETHER_ADDR_LEN); 974 } 975 976 static void 977 virtio_get_hwaddr(struct virtio_hw *hw) 978 { 979 if (vtpci_with_feature(hw, VIRTIO_NET_F_MAC)) { 980 vtpci_read_dev_config(hw, 981 offsetof(struct virtio_net_config, mac), 982 &hw->mac_addr, ETHER_ADDR_LEN); 983 } else { 984 eth_random_addr(&hw->mac_addr[0]); 985 virtio_set_hwaddr(hw); 986 } 987 } 988 989 static int 990 virtio_mac_table_set(struct virtio_hw *hw, 991 const struct virtio_net_ctrl_mac *uc, 992 const struct virtio_net_ctrl_mac *mc) 993 { 994 struct virtio_pmd_ctrl ctrl; 995 int err, len[2]; 996 997 if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) { 998 PMD_DRV_LOG(INFO, "host does not support mac table"); 999 return -1; 1000 } 1001 1002 ctrl.hdr.class = VIRTIO_NET_CTRL_MAC; 1003 ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET; 1004 1005 len[0] = uc->entries * ETHER_ADDR_LEN + sizeof(uc->entries); 1006 memcpy(ctrl.data, uc, len[0]); 1007 1008 len[1] = mc->entries * ETHER_ADDR_LEN + sizeof(mc->entries); 1009 memcpy(ctrl.data + len[0], mc, len[1]); 1010 1011 err = virtio_send_command(hw->cvq, &ctrl, len, 2); 1012 if (err != 0) 1013 PMD_DRV_LOG(NOTICE, "mac table set failed: %d", err); 1014 return err; 1015 } 1016 1017 static int 1018 virtio_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr, 1019 uint32_t index, uint32_t vmdq __rte_unused) 1020 { 1021 struct virtio_hw *hw = dev->data->dev_private; 1022 const struct ether_addr *addrs = dev->data->mac_addrs; 1023 unsigned int i; 1024 struct virtio_net_ctrl_mac *uc, *mc; 1025 1026 if (index >= VIRTIO_MAX_MAC_ADDRS) { 1027 PMD_DRV_LOG(ERR, "mac address index %u out of range", index); 1028 return -EINVAL; 1029 } 1030 1031 uc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(uc->entries)); 1032 uc->entries = 0; 1033 mc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(mc->entries)); 1034 mc->entries = 0; 1035 1036 for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) { 1037 const struct ether_addr *addr 1038 = (i == index) ? mac_addr : addrs + i; 1039 struct virtio_net_ctrl_mac *tbl 1040 = is_multicast_ether_addr(addr) ? mc : uc; 1041 1042 memcpy(&tbl->macs[tbl->entries++], addr, ETHER_ADDR_LEN); 1043 } 1044 1045 return virtio_mac_table_set(hw, uc, mc); 1046 } 1047 1048 static void 1049 virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index) 1050 { 1051 struct virtio_hw *hw = dev->data->dev_private; 1052 struct ether_addr *addrs = dev->data->mac_addrs; 1053 struct virtio_net_ctrl_mac *uc, *mc; 1054 unsigned int i; 1055 1056 if (index >= VIRTIO_MAX_MAC_ADDRS) { 1057 PMD_DRV_LOG(ERR, "mac address index %u out of range", index); 1058 return; 1059 } 1060 1061 uc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(uc->entries)); 1062 uc->entries = 0; 1063 mc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(mc->entries)); 1064 mc->entries = 0; 1065 1066 for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) { 1067 struct virtio_net_ctrl_mac *tbl; 1068 1069 if (i == index || is_zero_ether_addr(addrs + i)) 1070 continue; 1071 1072 tbl = is_multicast_ether_addr(addrs + i) ? mc : uc; 1073 memcpy(&tbl->macs[tbl->entries++], addrs + i, ETHER_ADDR_LEN); 1074 } 1075 1076 virtio_mac_table_set(hw, uc, mc); 1077 } 1078 1079 static int 1080 virtio_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr) 1081 { 1082 struct virtio_hw *hw = dev->data->dev_private; 1083 1084 memcpy(hw->mac_addr, mac_addr, ETHER_ADDR_LEN); 1085 1086 /* Use atomic update if available */ 1087 if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) { 1088 struct virtio_pmd_ctrl ctrl; 1089 int len = ETHER_ADDR_LEN; 1090 1091 ctrl.hdr.class = VIRTIO_NET_CTRL_MAC; 1092 ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET; 1093 1094 memcpy(ctrl.data, mac_addr, ETHER_ADDR_LEN); 1095 return virtio_send_command(hw->cvq, &ctrl, &len, 1); 1096 } 1097 1098 if (!vtpci_with_feature(hw, VIRTIO_NET_F_MAC)) 1099 return -ENOTSUP; 1100 1101 virtio_set_hwaddr(hw); 1102 return 0; 1103 } 1104 1105 static int 1106 virtio_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on) 1107 { 1108 struct virtio_hw *hw = dev->data->dev_private; 1109 struct virtio_pmd_ctrl ctrl; 1110 int len; 1111 1112 if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) 1113 return -ENOTSUP; 1114 1115 ctrl.hdr.class = VIRTIO_NET_CTRL_VLAN; 1116 ctrl.hdr.cmd = on ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL; 1117 memcpy(ctrl.data, &vlan_id, sizeof(vlan_id)); 1118 len = sizeof(vlan_id); 1119 1120 return virtio_send_command(hw->cvq, &ctrl, &len, 1); 1121 } 1122 1123 static int 1124 virtio_intr_enable(struct rte_eth_dev *dev) 1125 { 1126 struct virtio_hw *hw = dev->data->dev_private; 1127 1128 if (rte_intr_enable(dev->intr_handle) < 0) 1129 return -1; 1130 1131 if (!hw->virtio_user_dev) 1132 hw->use_msix = vtpci_msix_detect(RTE_ETH_DEV_TO_PCI(dev)); 1133 1134 return 0; 1135 } 1136 1137 static int 1138 virtio_intr_disable(struct rte_eth_dev *dev) 1139 { 1140 struct virtio_hw *hw = dev->data->dev_private; 1141 1142 if (rte_intr_disable(dev->intr_handle) < 0) 1143 return -1; 1144 1145 if (!hw->virtio_user_dev) 1146 hw->use_msix = vtpci_msix_detect(RTE_ETH_DEV_TO_PCI(dev)); 1147 1148 return 0; 1149 } 1150 1151 static int 1152 virtio_negotiate_features(struct virtio_hw *hw, uint64_t req_features) 1153 { 1154 uint64_t host_features; 1155 1156 /* Prepare guest_features: feature that driver wants to support */ 1157 PMD_INIT_LOG(DEBUG, "guest_features before negotiate = %" PRIx64, 1158 req_features); 1159 1160 /* Read device(host) feature bits */ 1161 host_features = VTPCI_OPS(hw)->get_features(hw); 1162 PMD_INIT_LOG(DEBUG, "host_features before negotiate = %" PRIx64, 1163 host_features); 1164 1165 /* If supported, ensure MTU value is valid before acknowledging it. */ 1166 if (host_features & req_features & (1ULL << VIRTIO_NET_F_MTU)) { 1167 struct virtio_net_config config; 1168 1169 vtpci_read_dev_config(hw, 1170 offsetof(struct virtio_net_config, mtu), 1171 &config.mtu, sizeof(config.mtu)); 1172 1173 if (config.mtu < ETHER_MIN_MTU) 1174 req_features &= ~(1ULL << VIRTIO_NET_F_MTU); 1175 } 1176 1177 /* 1178 * Negotiate features: Subset of device feature bits are written back 1179 * guest feature bits. 1180 */ 1181 hw->guest_features = req_features; 1182 hw->guest_features = vtpci_negotiate_features(hw, host_features); 1183 PMD_INIT_LOG(DEBUG, "features after negotiate = %" PRIx64, 1184 hw->guest_features); 1185 1186 if (hw->modern) { 1187 if (!vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) { 1188 PMD_INIT_LOG(ERR, 1189 "VIRTIO_F_VERSION_1 features is not enabled."); 1190 return -1; 1191 } 1192 vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_FEATURES_OK); 1193 if (!(vtpci_get_status(hw) & VIRTIO_CONFIG_STATUS_FEATURES_OK)) { 1194 PMD_INIT_LOG(ERR, 1195 "failed to set FEATURES_OK status!"); 1196 return -1; 1197 } 1198 } 1199 1200 hw->req_guest_features = req_features; 1201 1202 return 0; 1203 } 1204 1205 int 1206 virtio_dev_pause(struct rte_eth_dev *dev) 1207 { 1208 struct virtio_hw *hw = dev->data->dev_private; 1209 1210 rte_spinlock_lock(&hw->state_lock); 1211 1212 if (hw->started == 0) { 1213 /* Device is just stopped. */ 1214 rte_spinlock_unlock(&hw->state_lock); 1215 return -1; 1216 } 1217 hw->started = 0; 1218 /* 1219 * Prevent the worker threads from touching queues to avoid contention, 1220 * 1 ms should be enough for the ongoing Tx function to finish. 1221 */ 1222 rte_delay_ms(1); 1223 return 0; 1224 } 1225 1226 /* 1227 * Recover hw state to let the worker threads continue. 1228 */ 1229 void 1230 virtio_dev_resume(struct rte_eth_dev *dev) 1231 { 1232 struct virtio_hw *hw = dev->data->dev_private; 1233 1234 hw->started = 1; 1235 rte_spinlock_unlock(&hw->state_lock); 1236 } 1237 1238 /* 1239 * Should be called only after device is paused. 1240 */ 1241 int 1242 virtio_inject_pkts(struct rte_eth_dev *dev, struct rte_mbuf **tx_pkts, 1243 int nb_pkts) 1244 { 1245 struct virtio_hw *hw = dev->data->dev_private; 1246 struct virtnet_tx *txvq = dev->data->tx_queues[0]; 1247 int ret; 1248 1249 hw->inject_pkts = tx_pkts; 1250 ret = dev->tx_pkt_burst(txvq, tx_pkts, nb_pkts); 1251 hw->inject_pkts = NULL; 1252 1253 return ret; 1254 } 1255 1256 static void 1257 virtio_notify_peers(struct rte_eth_dev *dev) 1258 { 1259 struct virtio_hw *hw = dev->data->dev_private; 1260 struct virtnet_rx *rxvq; 1261 struct rte_mbuf *rarp_mbuf; 1262 1263 if (!dev->data->rx_queues) 1264 return; 1265 1266 rxvq = dev->data->rx_queues[0]; 1267 if (!rxvq) 1268 return; 1269 1270 rarp_mbuf = rte_net_make_rarp_packet(rxvq->mpool, 1271 (struct ether_addr *)hw->mac_addr); 1272 if (rarp_mbuf == NULL) { 1273 PMD_DRV_LOG(ERR, "failed to make RARP packet."); 1274 return; 1275 } 1276 1277 /* If virtio port just stopped, no need to send RARP */ 1278 if (virtio_dev_pause(dev) < 0) { 1279 rte_pktmbuf_free(rarp_mbuf); 1280 return; 1281 } 1282 1283 virtio_inject_pkts(dev, &rarp_mbuf, 1); 1284 virtio_dev_resume(dev); 1285 } 1286 1287 static void 1288 virtio_ack_link_announce(struct rte_eth_dev *dev) 1289 { 1290 struct virtio_hw *hw = dev->data->dev_private; 1291 struct virtio_pmd_ctrl ctrl; 1292 1293 ctrl.hdr.class = VIRTIO_NET_CTRL_ANNOUNCE; 1294 ctrl.hdr.cmd = VIRTIO_NET_CTRL_ANNOUNCE_ACK; 1295 1296 virtio_send_command(hw->cvq, &ctrl, NULL, 0); 1297 } 1298 1299 /* 1300 * Process virtio config changed interrupt. Call the callback 1301 * if link state changed, generate gratuitous RARP packet if 1302 * the status indicates an ANNOUNCE. 1303 */ 1304 void 1305 virtio_interrupt_handler(void *param) 1306 { 1307 struct rte_eth_dev *dev = param; 1308 struct virtio_hw *hw = dev->data->dev_private; 1309 uint8_t isr; 1310 uint16_t status; 1311 1312 /* Read interrupt status which clears interrupt */ 1313 isr = vtpci_isr(hw); 1314 PMD_DRV_LOG(INFO, "interrupt status = %#x", isr); 1315 1316 if (virtio_intr_enable(dev) < 0) 1317 PMD_DRV_LOG(ERR, "interrupt enable failed"); 1318 1319 if (isr & VIRTIO_PCI_ISR_CONFIG) { 1320 if (virtio_dev_link_update(dev, 0) == 0) 1321 _rte_eth_dev_callback_process(dev, 1322 RTE_ETH_EVENT_INTR_LSC, 1323 NULL); 1324 1325 if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) { 1326 vtpci_read_dev_config(hw, 1327 offsetof(struct virtio_net_config, status), 1328 &status, sizeof(status)); 1329 if (status & VIRTIO_NET_S_ANNOUNCE) { 1330 virtio_notify_peers(dev); 1331 if (hw->cvq) 1332 virtio_ack_link_announce(dev); 1333 } 1334 } 1335 } 1336 } 1337 1338 /* set rx and tx handlers according to what is supported */ 1339 static void 1340 set_rxtx_funcs(struct rte_eth_dev *eth_dev) 1341 { 1342 struct virtio_hw *hw = eth_dev->data->dev_private; 1343 1344 eth_dev->tx_pkt_prepare = virtio_xmit_pkts_prepare; 1345 if (hw->use_simple_rx) { 1346 PMD_INIT_LOG(INFO, "virtio: using simple Rx path on port %u", 1347 eth_dev->data->port_id); 1348 eth_dev->rx_pkt_burst = virtio_recv_pkts_vec; 1349 } else if (hw->use_inorder_rx) { 1350 PMD_INIT_LOG(INFO, 1351 "virtio: using inorder mergeable buffer Rx path on port %u", 1352 eth_dev->data->port_id); 1353 eth_dev->rx_pkt_burst = &virtio_recv_mergeable_pkts_inorder; 1354 } else if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) { 1355 PMD_INIT_LOG(INFO, 1356 "virtio: using mergeable buffer Rx path on port %u", 1357 eth_dev->data->port_id); 1358 eth_dev->rx_pkt_burst = &virtio_recv_mergeable_pkts; 1359 } else { 1360 PMD_INIT_LOG(INFO, "virtio: using standard Rx path on port %u", 1361 eth_dev->data->port_id); 1362 eth_dev->rx_pkt_burst = &virtio_recv_pkts; 1363 } 1364 1365 if (hw->use_inorder_tx) { 1366 PMD_INIT_LOG(INFO, "virtio: using inorder Tx path on port %u", 1367 eth_dev->data->port_id); 1368 eth_dev->tx_pkt_burst = virtio_xmit_pkts_inorder; 1369 } else { 1370 PMD_INIT_LOG(INFO, "virtio: using standard Tx path on port %u", 1371 eth_dev->data->port_id); 1372 eth_dev->tx_pkt_burst = virtio_xmit_pkts; 1373 } 1374 } 1375 1376 /* Only support 1:1 queue/interrupt mapping so far. 1377 * TODO: support n:1 queue/interrupt mapping when there are limited number of 1378 * interrupt vectors (<N+1). 1379 */ 1380 static int 1381 virtio_queues_bind_intr(struct rte_eth_dev *dev) 1382 { 1383 uint32_t i; 1384 struct virtio_hw *hw = dev->data->dev_private; 1385 1386 PMD_INIT_LOG(INFO, "queue/interrupt binding"); 1387 for (i = 0; i < dev->data->nb_rx_queues; ++i) { 1388 dev->intr_handle->intr_vec[i] = i + 1; 1389 if (VTPCI_OPS(hw)->set_queue_irq(hw, hw->vqs[i * 2], i + 1) == 1390 VIRTIO_MSI_NO_VECTOR) { 1391 PMD_DRV_LOG(ERR, "failed to set queue vector"); 1392 return -EBUSY; 1393 } 1394 } 1395 1396 return 0; 1397 } 1398 1399 static void 1400 virtio_queues_unbind_intr(struct rte_eth_dev *dev) 1401 { 1402 uint32_t i; 1403 struct virtio_hw *hw = dev->data->dev_private; 1404 1405 PMD_INIT_LOG(INFO, "queue/interrupt unbinding"); 1406 for (i = 0; i < dev->data->nb_rx_queues; ++i) 1407 VTPCI_OPS(hw)->set_queue_irq(hw, 1408 hw->vqs[i * VTNET_CQ], 1409 VIRTIO_MSI_NO_VECTOR); 1410 } 1411 1412 static int 1413 virtio_configure_intr(struct rte_eth_dev *dev) 1414 { 1415 struct virtio_hw *hw = dev->data->dev_private; 1416 1417 if (!rte_intr_cap_multiple(dev->intr_handle)) { 1418 PMD_INIT_LOG(ERR, "Multiple intr vector not supported"); 1419 return -ENOTSUP; 1420 } 1421 1422 if (rte_intr_efd_enable(dev->intr_handle, dev->data->nb_rx_queues)) { 1423 PMD_INIT_LOG(ERR, "Fail to create eventfd"); 1424 return -1; 1425 } 1426 1427 if (!dev->intr_handle->intr_vec) { 1428 dev->intr_handle->intr_vec = 1429 rte_zmalloc("intr_vec", 1430 hw->max_queue_pairs * sizeof(int), 0); 1431 if (!dev->intr_handle->intr_vec) { 1432 PMD_INIT_LOG(ERR, "Failed to allocate %u rxq vectors", 1433 hw->max_queue_pairs); 1434 return -ENOMEM; 1435 } 1436 } 1437 1438 /* Re-register callback to update max_intr */ 1439 rte_intr_callback_unregister(dev->intr_handle, 1440 virtio_interrupt_handler, 1441 dev); 1442 rte_intr_callback_register(dev->intr_handle, 1443 virtio_interrupt_handler, 1444 dev); 1445 1446 /* DO NOT try to remove this! This function will enable msix, or QEMU 1447 * will encounter SIGSEGV when DRIVER_OK is sent. 1448 * And for legacy devices, this should be done before queue/vec binding 1449 * to change the config size from 20 to 24, or VIRTIO_MSI_QUEUE_VECTOR 1450 * (22) will be ignored. 1451 */ 1452 if (virtio_intr_enable(dev) < 0) { 1453 PMD_DRV_LOG(ERR, "interrupt enable failed"); 1454 return -1; 1455 } 1456 1457 if (virtio_queues_bind_intr(dev) < 0) { 1458 PMD_INIT_LOG(ERR, "Failed to bind queue/interrupt"); 1459 return -1; 1460 } 1461 1462 return 0; 1463 } 1464 1465 /* reset device and renegotiate features if needed */ 1466 static int 1467 virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features) 1468 { 1469 struct virtio_hw *hw = eth_dev->data->dev_private; 1470 struct virtio_net_config *config; 1471 struct virtio_net_config local_config; 1472 struct rte_pci_device *pci_dev = NULL; 1473 int ret; 1474 1475 /* Reset the device although not necessary at startup */ 1476 vtpci_reset(hw); 1477 1478 if (hw->vqs) { 1479 virtio_dev_free_mbufs(eth_dev); 1480 virtio_free_queues(hw); 1481 } 1482 1483 /* Tell the host we've noticed this device. */ 1484 vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_ACK); 1485 1486 /* Tell the host we've known how to drive the device. */ 1487 vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER); 1488 if (virtio_negotiate_features(hw, req_features) < 0) 1489 return -1; 1490 1491 if (!hw->virtio_user_dev) { 1492 pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); 1493 rte_eth_copy_pci_info(eth_dev, pci_dev); 1494 } 1495 1496 /* If host does not support both status and MSI-X then disable LSC */ 1497 if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS) && 1498 hw->use_msix != VIRTIO_MSIX_NONE) 1499 eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC; 1500 else 1501 eth_dev->data->dev_flags &= ~RTE_ETH_DEV_INTR_LSC; 1502 1503 /* Setting up rx_header size for the device */ 1504 if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF) || 1505 vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) 1506 hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf); 1507 else 1508 hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr); 1509 1510 /* Copy the permanent MAC address to: virtio_hw */ 1511 virtio_get_hwaddr(hw); 1512 ether_addr_copy((struct ether_addr *) hw->mac_addr, 1513 ð_dev->data->mac_addrs[0]); 1514 PMD_INIT_LOG(DEBUG, 1515 "PORT MAC: %02X:%02X:%02X:%02X:%02X:%02X", 1516 hw->mac_addr[0], hw->mac_addr[1], hw->mac_addr[2], 1517 hw->mac_addr[3], hw->mac_addr[4], hw->mac_addr[5]); 1518 1519 if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VQ)) { 1520 config = &local_config; 1521 1522 vtpci_read_dev_config(hw, 1523 offsetof(struct virtio_net_config, mac), 1524 &config->mac, sizeof(config->mac)); 1525 1526 if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) { 1527 vtpci_read_dev_config(hw, 1528 offsetof(struct virtio_net_config, status), 1529 &config->status, sizeof(config->status)); 1530 } else { 1531 PMD_INIT_LOG(DEBUG, 1532 "VIRTIO_NET_F_STATUS is not supported"); 1533 config->status = 0; 1534 } 1535 1536 if (vtpci_with_feature(hw, VIRTIO_NET_F_MQ)) { 1537 vtpci_read_dev_config(hw, 1538 offsetof(struct virtio_net_config, max_virtqueue_pairs), 1539 &config->max_virtqueue_pairs, 1540 sizeof(config->max_virtqueue_pairs)); 1541 } else { 1542 PMD_INIT_LOG(DEBUG, 1543 "VIRTIO_NET_F_MQ is not supported"); 1544 config->max_virtqueue_pairs = 1; 1545 } 1546 1547 hw->max_queue_pairs = config->max_virtqueue_pairs; 1548 1549 if (vtpci_with_feature(hw, VIRTIO_NET_F_MTU)) { 1550 vtpci_read_dev_config(hw, 1551 offsetof(struct virtio_net_config, mtu), 1552 &config->mtu, 1553 sizeof(config->mtu)); 1554 1555 /* 1556 * MTU value has already been checked at negotiation 1557 * time, but check again in case it has changed since 1558 * then, which should not happen. 1559 */ 1560 if (config->mtu < ETHER_MIN_MTU) { 1561 PMD_INIT_LOG(ERR, "invalid max MTU value (%u)", 1562 config->mtu); 1563 return -1; 1564 } 1565 1566 hw->max_mtu = config->mtu; 1567 /* Set initial MTU to maximum one supported by vhost */ 1568 eth_dev->data->mtu = config->mtu; 1569 1570 } else { 1571 hw->max_mtu = VIRTIO_MAX_RX_PKTLEN - ETHER_HDR_LEN - 1572 VLAN_TAG_LEN - hw->vtnet_hdr_size; 1573 } 1574 1575 PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=%d", 1576 config->max_virtqueue_pairs); 1577 PMD_INIT_LOG(DEBUG, "config->status=%d", config->status); 1578 PMD_INIT_LOG(DEBUG, 1579 "PORT MAC: %02X:%02X:%02X:%02X:%02X:%02X", 1580 config->mac[0], config->mac[1], 1581 config->mac[2], config->mac[3], 1582 config->mac[4], config->mac[5]); 1583 } else { 1584 PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=1"); 1585 hw->max_queue_pairs = 1; 1586 hw->max_mtu = VIRTIO_MAX_RX_PKTLEN - ETHER_HDR_LEN - 1587 VLAN_TAG_LEN - hw->vtnet_hdr_size; 1588 } 1589 1590 ret = virtio_alloc_queues(eth_dev); 1591 if (ret < 0) 1592 return ret; 1593 1594 if (eth_dev->data->dev_conf.intr_conf.rxq) { 1595 if (virtio_configure_intr(eth_dev) < 0) { 1596 PMD_INIT_LOG(ERR, "failed to configure interrupt"); 1597 virtio_free_queues(hw); 1598 return -1; 1599 } 1600 } 1601 1602 vtpci_reinit_complete(hw); 1603 1604 if (pci_dev) 1605 PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x", 1606 eth_dev->data->port_id, pci_dev->id.vendor_id, 1607 pci_dev->id.device_id); 1608 1609 return 0; 1610 } 1611 1612 /* 1613 * Remap the PCI device again (IO port map for legacy device and 1614 * memory map for modern device), so that the secondary process 1615 * could have the PCI initiated correctly. 1616 */ 1617 static int 1618 virtio_remap_pci(struct rte_pci_device *pci_dev, struct virtio_hw *hw) 1619 { 1620 if (hw->modern) { 1621 /* 1622 * We don't have to re-parse the PCI config space, since 1623 * rte_pci_map_device() makes sure the mapped address 1624 * in secondary process would equal to the one mapped in 1625 * the primary process: error will be returned if that 1626 * requirement is not met. 1627 * 1628 * That said, we could simply reuse all cap pointers 1629 * (such as dev_cfg, common_cfg, etc.) parsed from the 1630 * primary process, which is stored in shared memory. 1631 */ 1632 if (rte_pci_map_device(pci_dev)) { 1633 PMD_INIT_LOG(DEBUG, "failed to map pci device!"); 1634 return -1; 1635 } 1636 } else { 1637 if (rte_pci_ioport_map(pci_dev, 0, VTPCI_IO(hw)) < 0) 1638 return -1; 1639 } 1640 1641 return 0; 1642 } 1643 1644 static void 1645 virtio_set_vtpci_ops(struct virtio_hw *hw) 1646 { 1647 #ifdef RTE_VIRTIO_USER 1648 if (hw->virtio_user_dev) 1649 VTPCI_OPS(hw) = &virtio_user_ops; 1650 else 1651 #endif 1652 if (hw->modern) 1653 VTPCI_OPS(hw) = &modern_ops; 1654 else 1655 VTPCI_OPS(hw) = &legacy_ops; 1656 } 1657 1658 /* 1659 * This function is based on probe() function in virtio_pci.c 1660 * It returns 0 on success. 1661 */ 1662 int 1663 eth_virtio_dev_init(struct rte_eth_dev *eth_dev) 1664 { 1665 struct virtio_hw *hw = eth_dev->data->dev_private; 1666 int ret; 1667 1668 if (sizeof(struct virtio_net_hdr_mrg_rxbuf) > RTE_PKTMBUF_HEADROOM) { 1669 PMD_INIT_LOG(ERR, 1670 "Not sufficient headroom required = %d, avail = %d", 1671 (int)sizeof(struct virtio_net_hdr_mrg_rxbuf), 1672 RTE_PKTMBUF_HEADROOM); 1673 1674 return -1; 1675 } 1676 1677 eth_dev->dev_ops = &virtio_eth_dev_ops; 1678 1679 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 1680 if (!hw->virtio_user_dev) { 1681 ret = virtio_remap_pci(RTE_ETH_DEV_TO_PCI(eth_dev), hw); 1682 if (ret) 1683 return ret; 1684 } 1685 1686 virtio_set_vtpci_ops(hw); 1687 set_rxtx_funcs(eth_dev); 1688 1689 return 0; 1690 } 1691 1692 /* Allocate memory for storing MAC addresses */ 1693 eth_dev->data->mac_addrs = rte_zmalloc("virtio", VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN, 0); 1694 if (eth_dev->data->mac_addrs == NULL) { 1695 PMD_INIT_LOG(ERR, 1696 "Failed to allocate %d bytes needed to store MAC addresses", 1697 VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN); 1698 return -ENOMEM; 1699 } 1700 1701 hw->port_id = eth_dev->data->port_id; 1702 /* For virtio_user case the hw->virtio_user_dev is populated by 1703 * virtio_user_eth_dev_alloc() before eth_virtio_dev_init() is called. 1704 */ 1705 if (!hw->virtio_user_dev) { 1706 ret = vtpci_init(RTE_ETH_DEV_TO_PCI(eth_dev), hw); 1707 if (ret) 1708 goto err_vtpci_init; 1709 } 1710 1711 /* reset device and negotiate default features */ 1712 ret = virtio_init_device(eth_dev, VIRTIO_PMD_DEFAULT_GUEST_FEATURES); 1713 if (ret < 0) 1714 goto err_virtio_init; 1715 1716 return 0; 1717 1718 err_virtio_init: 1719 if (!hw->virtio_user_dev) { 1720 rte_pci_unmap_device(RTE_ETH_DEV_TO_PCI(eth_dev)); 1721 if (!hw->modern) 1722 rte_pci_ioport_unmap(VTPCI_IO(hw)); 1723 } 1724 err_vtpci_init: 1725 rte_free(eth_dev->data->mac_addrs); 1726 eth_dev->data->mac_addrs = NULL; 1727 return ret; 1728 } 1729 1730 static int 1731 eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev) 1732 { 1733 struct virtio_hw *hw = eth_dev->data->dev_private; 1734 1735 PMD_INIT_FUNC_TRACE(); 1736 1737 if (rte_eal_process_type() == RTE_PROC_SECONDARY) 1738 return 0; 1739 1740 virtio_dev_stop(eth_dev); 1741 virtio_dev_close(eth_dev); 1742 1743 eth_dev->dev_ops = NULL; 1744 eth_dev->tx_pkt_burst = NULL; 1745 eth_dev->rx_pkt_burst = NULL; 1746 1747 if (eth_dev->device) { 1748 rte_pci_unmap_device(RTE_ETH_DEV_TO_PCI(eth_dev)); 1749 if (!hw->modern) 1750 rte_pci_ioport_unmap(VTPCI_IO(hw)); 1751 } 1752 1753 PMD_INIT_LOG(DEBUG, "dev_uninit completed"); 1754 1755 return 0; 1756 } 1757 1758 static int vdpa_check_handler(__rte_unused const char *key, 1759 const char *value, __rte_unused void *opaque) 1760 { 1761 if (strcmp(value, "1")) 1762 return -1; 1763 1764 return 0; 1765 } 1766 1767 static int 1768 vdpa_mode_selected(struct rte_devargs *devargs) 1769 { 1770 struct rte_kvargs *kvlist; 1771 const char *key = "vdpa"; 1772 int ret = 0; 1773 1774 if (devargs == NULL) 1775 return 0; 1776 1777 kvlist = rte_kvargs_parse(devargs->args, NULL); 1778 if (kvlist == NULL) 1779 return 0; 1780 1781 if (!rte_kvargs_count(kvlist, key)) 1782 goto exit; 1783 1784 /* vdpa mode selected when there's a key-value pair: vdpa=1 */ 1785 if (rte_kvargs_process(kvlist, key, 1786 vdpa_check_handler, NULL) < 0) { 1787 goto exit; 1788 } 1789 ret = 1; 1790 1791 exit: 1792 rte_kvargs_free(kvlist); 1793 return ret; 1794 } 1795 1796 static int eth_virtio_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, 1797 struct rte_pci_device *pci_dev) 1798 { 1799 if (rte_eal_iopl_init() != 0) { 1800 PMD_INIT_LOG(ERR, "IOPL call failed - cannot use virtio PMD"); 1801 return 1; 1802 } 1803 1804 /* virtio pmd skips probe if device needs to work in vdpa mode */ 1805 if (vdpa_mode_selected(pci_dev->device.devargs)) 1806 return 1; 1807 1808 return rte_eth_dev_pci_generic_probe(pci_dev, sizeof(struct virtio_hw), 1809 eth_virtio_dev_init); 1810 } 1811 1812 static int eth_virtio_pci_remove(struct rte_pci_device *pci_dev) 1813 { 1814 return rte_eth_dev_pci_generic_remove(pci_dev, eth_virtio_dev_uninit); 1815 } 1816 1817 static struct rte_pci_driver rte_virtio_pmd = { 1818 .driver = { 1819 .name = "net_virtio", 1820 }, 1821 .id_table = pci_id_virtio_map, 1822 .drv_flags = 0, 1823 .probe = eth_virtio_pci_probe, 1824 .remove = eth_virtio_pci_remove, 1825 }; 1826 1827 RTE_INIT(rte_virtio_pmd_init) 1828 { 1829 rte_eal_iopl_init(); 1830 rte_pci_register(&rte_virtio_pmd); 1831 } 1832 1833 static bool 1834 rx_offload_enabled(struct virtio_hw *hw) 1835 { 1836 return vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM) || 1837 vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) || 1838 vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6); 1839 } 1840 1841 static bool 1842 tx_offload_enabled(struct virtio_hw *hw) 1843 { 1844 return vtpci_with_feature(hw, VIRTIO_NET_F_CSUM) || 1845 vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO4) || 1846 vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO6); 1847 } 1848 1849 /* 1850 * Configure virtio device 1851 * It returns 0 on success. 1852 */ 1853 static int 1854 virtio_dev_configure(struct rte_eth_dev *dev) 1855 { 1856 const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode; 1857 const struct rte_eth_txmode *txmode = &dev->data->dev_conf.txmode; 1858 struct virtio_hw *hw = dev->data->dev_private; 1859 uint32_t ether_hdr_len = ETHER_HDR_LEN + VLAN_TAG_LEN + 1860 hw->vtnet_hdr_size; 1861 uint64_t rx_offloads = rxmode->offloads; 1862 uint64_t tx_offloads = txmode->offloads; 1863 uint64_t req_features; 1864 int ret; 1865 1866 PMD_INIT_LOG(DEBUG, "configure"); 1867 req_features = VIRTIO_PMD_DEFAULT_GUEST_FEATURES; 1868 1869 if (dev->data->dev_conf.intr_conf.rxq) { 1870 ret = virtio_init_device(dev, hw->req_guest_features); 1871 if (ret < 0) 1872 return ret; 1873 } 1874 1875 if (rxmode->max_rx_pkt_len > hw->max_mtu + ether_hdr_len) 1876 req_features &= ~(1ULL << VIRTIO_NET_F_MTU); 1877 1878 if (rx_offloads & (DEV_RX_OFFLOAD_UDP_CKSUM | 1879 DEV_RX_OFFLOAD_TCP_CKSUM)) 1880 req_features |= (1ULL << VIRTIO_NET_F_GUEST_CSUM); 1881 1882 if (rx_offloads & DEV_RX_OFFLOAD_TCP_LRO) 1883 req_features |= 1884 (1ULL << VIRTIO_NET_F_GUEST_TSO4) | 1885 (1ULL << VIRTIO_NET_F_GUEST_TSO6); 1886 1887 if (tx_offloads & (DEV_TX_OFFLOAD_UDP_CKSUM | 1888 DEV_TX_OFFLOAD_TCP_CKSUM)) 1889 req_features |= (1ULL << VIRTIO_NET_F_CSUM); 1890 1891 if (tx_offloads & DEV_TX_OFFLOAD_TCP_TSO) 1892 req_features |= 1893 (1ULL << VIRTIO_NET_F_HOST_TSO4) | 1894 (1ULL << VIRTIO_NET_F_HOST_TSO6); 1895 1896 /* if request features changed, reinit the device */ 1897 if (req_features != hw->req_guest_features) { 1898 ret = virtio_init_device(dev, req_features); 1899 if (ret < 0) 1900 return ret; 1901 } 1902 1903 if ((rx_offloads & (DEV_RX_OFFLOAD_UDP_CKSUM | 1904 DEV_RX_OFFLOAD_TCP_CKSUM)) && 1905 !vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM)) { 1906 PMD_DRV_LOG(ERR, 1907 "rx checksum not available on this host"); 1908 return -ENOTSUP; 1909 } 1910 1911 if ((rx_offloads & DEV_RX_OFFLOAD_TCP_LRO) && 1912 (!vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) || 1913 !vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6))) { 1914 PMD_DRV_LOG(ERR, 1915 "Large Receive Offload not available on this host"); 1916 return -ENOTSUP; 1917 } 1918 1919 /* start control queue */ 1920 if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VQ)) 1921 virtio_dev_cq_start(dev); 1922 1923 if (rx_offloads & DEV_RX_OFFLOAD_VLAN_STRIP) 1924 hw->vlan_strip = 1; 1925 1926 if ((rx_offloads & DEV_RX_OFFLOAD_VLAN_FILTER) 1927 && !vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) { 1928 PMD_DRV_LOG(ERR, 1929 "vlan filtering not available on this host"); 1930 return -ENOTSUP; 1931 } 1932 1933 hw->has_tx_offload = tx_offload_enabled(hw); 1934 hw->has_rx_offload = rx_offload_enabled(hw); 1935 1936 if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) 1937 /* Enable vector (0) for Link State Intrerrupt */ 1938 if (VTPCI_OPS(hw)->set_config_irq(hw, 0) == 1939 VIRTIO_MSI_NO_VECTOR) { 1940 PMD_DRV_LOG(ERR, "failed to set config vector"); 1941 return -EBUSY; 1942 } 1943 1944 rte_spinlock_init(&hw->state_lock); 1945 1946 hw->use_simple_rx = 1; 1947 1948 if (vtpci_with_feature(hw, VIRTIO_F_IN_ORDER)) { 1949 hw->use_inorder_tx = 1; 1950 if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) { 1951 hw->use_inorder_rx = 1; 1952 hw->use_simple_rx = 0; 1953 } else { 1954 hw->use_inorder_rx = 0; 1955 } 1956 } 1957 1958 #if defined RTE_ARCH_ARM64 || defined RTE_ARCH_ARM 1959 if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON)) { 1960 hw->use_simple_rx = 0; 1961 } 1962 #endif 1963 if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) { 1964 hw->use_simple_rx = 0; 1965 } 1966 1967 if (rx_offloads & (DEV_RX_OFFLOAD_UDP_CKSUM | 1968 DEV_RX_OFFLOAD_TCP_CKSUM | 1969 DEV_RX_OFFLOAD_TCP_LRO | 1970 DEV_RX_OFFLOAD_VLAN_STRIP)) 1971 hw->use_simple_rx = 0; 1972 1973 hw->opened = true; 1974 1975 return 0; 1976 } 1977 1978 1979 static int 1980 virtio_dev_start(struct rte_eth_dev *dev) 1981 { 1982 uint16_t nb_queues, i; 1983 struct virtnet_rx *rxvq; 1984 struct virtnet_tx *txvq __rte_unused; 1985 struct virtio_hw *hw = dev->data->dev_private; 1986 int ret; 1987 1988 /* Finish the initialization of the queues */ 1989 for (i = 0; i < dev->data->nb_rx_queues; i++) { 1990 ret = virtio_dev_rx_queue_setup_finish(dev, i); 1991 if (ret < 0) 1992 return ret; 1993 } 1994 for (i = 0; i < dev->data->nb_tx_queues; i++) { 1995 ret = virtio_dev_tx_queue_setup_finish(dev, i); 1996 if (ret < 0) 1997 return ret; 1998 } 1999 2000 /* check if lsc interrupt feature is enabled */ 2001 if (dev->data->dev_conf.intr_conf.lsc) { 2002 if (!(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) { 2003 PMD_DRV_LOG(ERR, "link status not supported by host"); 2004 return -ENOTSUP; 2005 } 2006 } 2007 2008 /* Enable uio/vfio intr/eventfd mapping: althrough we already did that 2009 * in device configure, but it could be unmapped when device is 2010 * stopped. 2011 */ 2012 if (dev->data->dev_conf.intr_conf.lsc || 2013 dev->data->dev_conf.intr_conf.rxq) { 2014 virtio_intr_disable(dev); 2015 2016 /* Setup interrupt callback */ 2017 if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) 2018 rte_intr_callback_register(dev->intr_handle, 2019 virtio_interrupt_handler, 2020 dev); 2021 2022 if (virtio_intr_enable(dev) < 0) { 2023 PMD_DRV_LOG(ERR, "interrupt enable failed"); 2024 return -EIO; 2025 } 2026 } 2027 2028 /*Notify the backend 2029 *Otherwise the tap backend might already stop its queue due to fullness. 2030 *vhost backend will have no chance to be waked up 2031 */ 2032 nb_queues = RTE_MAX(dev->data->nb_rx_queues, dev->data->nb_tx_queues); 2033 if (hw->max_queue_pairs > 1) { 2034 if (virtio_set_multiple_queues(dev, nb_queues) != 0) 2035 return -EINVAL; 2036 } 2037 2038 PMD_INIT_LOG(DEBUG, "nb_queues=%d", nb_queues); 2039 2040 for (i = 0; i < dev->data->nb_rx_queues; i++) { 2041 rxvq = dev->data->rx_queues[i]; 2042 /* Flush the old packets */ 2043 virtqueue_rxvq_flush(rxvq->vq); 2044 virtqueue_notify(rxvq->vq); 2045 } 2046 2047 for (i = 0; i < dev->data->nb_tx_queues; i++) { 2048 txvq = dev->data->tx_queues[i]; 2049 virtqueue_notify(txvq->vq); 2050 } 2051 2052 PMD_INIT_LOG(DEBUG, "Notified backend at initialization"); 2053 2054 for (i = 0; i < dev->data->nb_rx_queues; i++) { 2055 rxvq = dev->data->rx_queues[i]; 2056 VIRTQUEUE_DUMP(rxvq->vq); 2057 } 2058 2059 for (i = 0; i < dev->data->nb_tx_queues; i++) { 2060 txvq = dev->data->tx_queues[i]; 2061 VIRTQUEUE_DUMP(txvq->vq); 2062 } 2063 2064 set_rxtx_funcs(dev); 2065 hw->started = true; 2066 2067 /* Initialize Link state */ 2068 virtio_dev_link_update(dev, 0); 2069 2070 return 0; 2071 } 2072 2073 static void virtio_dev_free_mbufs(struct rte_eth_dev *dev) 2074 { 2075 struct virtio_hw *hw = dev->data->dev_private; 2076 uint16_t nr_vq = virtio_get_nr_vq(hw); 2077 const char *type __rte_unused; 2078 unsigned int i, mbuf_num = 0; 2079 struct virtqueue *vq; 2080 struct rte_mbuf *buf; 2081 int queue_type; 2082 2083 if (hw->vqs == NULL) 2084 return; 2085 2086 for (i = 0; i < nr_vq; i++) { 2087 vq = hw->vqs[i]; 2088 if (!vq) 2089 continue; 2090 2091 queue_type = virtio_get_queue_type(hw, i); 2092 if (queue_type == VTNET_RQ) 2093 type = "rxq"; 2094 else if (queue_type == VTNET_TQ) 2095 type = "txq"; 2096 else 2097 continue; 2098 2099 PMD_INIT_LOG(DEBUG, 2100 "Before freeing %s[%d] used and unused buf", 2101 type, i); 2102 VIRTQUEUE_DUMP(vq); 2103 2104 while ((buf = virtqueue_detach_unused(vq)) != NULL) { 2105 rte_pktmbuf_free(buf); 2106 mbuf_num++; 2107 } 2108 2109 PMD_INIT_LOG(DEBUG, 2110 "After freeing %s[%d] used and unused buf", 2111 type, i); 2112 VIRTQUEUE_DUMP(vq); 2113 } 2114 2115 PMD_INIT_LOG(DEBUG, "%d mbufs freed", mbuf_num); 2116 } 2117 2118 /* 2119 * Stop device: disable interrupt and mark link down 2120 */ 2121 static void 2122 virtio_dev_stop(struct rte_eth_dev *dev) 2123 { 2124 struct virtio_hw *hw = dev->data->dev_private; 2125 struct rte_eth_link link; 2126 struct rte_intr_conf *intr_conf = &dev->data->dev_conf.intr_conf; 2127 2128 PMD_INIT_LOG(DEBUG, "stop"); 2129 2130 rte_spinlock_lock(&hw->state_lock); 2131 if (!hw->started) 2132 goto out_unlock; 2133 hw->started = false; 2134 2135 if (intr_conf->lsc || intr_conf->rxq) { 2136 virtio_intr_disable(dev); 2137 2138 /* Reset interrupt callback */ 2139 if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) { 2140 rte_intr_callback_unregister(dev->intr_handle, 2141 virtio_interrupt_handler, 2142 dev); 2143 } 2144 } 2145 2146 memset(&link, 0, sizeof(link)); 2147 rte_eth_linkstatus_set(dev, &link); 2148 out_unlock: 2149 rte_spinlock_unlock(&hw->state_lock); 2150 } 2151 2152 static int 2153 virtio_dev_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complete) 2154 { 2155 struct rte_eth_link link; 2156 uint16_t status; 2157 struct virtio_hw *hw = dev->data->dev_private; 2158 2159 memset(&link, 0, sizeof(link)); 2160 link.link_duplex = ETH_LINK_FULL_DUPLEX; 2161 link.link_speed = ETH_SPEED_NUM_10G; 2162 link.link_autoneg = ETH_LINK_FIXED; 2163 2164 if (!hw->started) { 2165 link.link_status = ETH_LINK_DOWN; 2166 } else if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) { 2167 PMD_INIT_LOG(DEBUG, "Get link status from hw"); 2168 vtpci_read_dev_config(hw, 2169 offsetof(struct virtio_net_config, status), 2170 &status, sizeof(status)); 2171 if ((status & VIRTIO_NET_S_LINK_UP) == 0) { 2172 link.link_status = ETH_LINK_DOWN; 2173 PMD_INIT_LOG(DEBUG, "Port %d is down", 2174 dev->data->port_id); 2175 } else { 2176 link.link_status = ETH_LINK_UP; 2177 PMD_INIT_LOG(DEBUG, "Port %d is up", 2178 dev->data->port_id); 2179 } 2180 } else { 2181 link.link_status = ETH_LINK_UP; 2182 } 2183 2184 return rte_eth_linkstatus_set(dev, &link); 2185 } 2186 2187 static int 2188 virtio_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask) 2189 { 2190 const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode; 2191 struct virtio_hw *hw = dev->data->dev_private; 2192 uint64_t offloads = rxmode->offloads; 2193 2194 if (mask & ETH_VLAN_FILTER_MASK) { 2195 if ((offloads & DEV_RX_OFFLOAD_VLAN_FILTER) && 2196 !vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) { 2197 2198 PMD_DRV_LOG(NOTICE, 2199 "vlan filtering not available on this host"); 2200 2201 return -ENOTSUP; 2202 } 2203 } 2204 2205 if (mask & ETH_VLAN_STRIP_MASK) 2206 hw->vlan_strip = !!(offloads & DEV_RX_OFFLOAD_VLAN_STRIP); 2207 2208 return 0; 2209 } 2210 2211 static void 2212 virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) 2213 { 2214 uint64_t tso_mask, host_features; 2215 struct virtio_hw *hw = dev->data->dev_private; 2216 2217 dev_info->speed_capa = ETH_LINK_SPEED_10G; /* fake value */ 2218 2219 dev_info->max_rx_queues = 2220 RTE_MIN(hw->max_queue_pairs, VIRTIO_MAX_RX_QUEUES); 2221 dev_info->max_tx_queues = 2222 RTE_MIN(hw->max_queue_pairs, VIRTIO_MAX_TX_QUEUES); 2223 dev_info->min_rx_bufsize = VIRTIO_MIN_RX_BUFSIZE; 2224 dev_info->max_rx_pktlen = VIRTIO_MAX_RX_PKTLEN; 2225 dev_info->max_mac_addrs = VIRTIO_MAX_MAC_ADDRS; 2226 2227 host_features = VTPCI_OPS(hw)->get_features(hw); 2228 dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP; 2229 dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_JUMBO_FRAME; 2230 if (host_features & (1ULL << VIRTIO_NET_F_GUEST_CSUM)) { 2231 dev_info->rx_offload_capa |= 2232 DEV_RX_OFFLOAD_TCP_CKSUM | 2233 DEV_RX_OFFLOAD_UDP_CKSUM; 2234 } 2235 if (host_features & (1ULL << VIRTIO_NET_F_CTRL_VLAN)) 2236 dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_VLAN_FILTER; 2237 tso_mask = (1ULL << VIRTIO_NET_F_GUEST_TSO4) | 2238 (1ULL << VIRTIO_NET_F_GUEST_TSO6); 2239 if ((host_features & tso_mask) == tso_mask) 2240 dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_TCP_LRO; 2241 2242 dev_info->tx_offload_capa = DEV_TX_OFFLOAD_MULTI_SEGS | 2243 DEV_TX_OFFLOAD_VLAN_INSERT; 2244 if (host_features & (1ULL << VIRTIO_NET_F_CSUM)) { 2245 dev_info->tx_offload_capa |= 2246 DEV_TX_OFFLOAD_UDP_CKSUM | 2247 DEV_TX_OFFLOAD_TCP_CKSUM; 2248 } 2249 tso_mask = (1ULL << VIRTIO_NET_F_HOST_TSO4) | 2250 (1ULL << VIRTIO_NET_F_HOST_TSO6); 2251 if ((host_features & tso_mask) == tso_mask) 2252 dev_info->tx_offload_capa |= DEV_TX_OFFLOAD_TCP_TSO; 2253 } 2254 2255 /* 2256 * It enables testpmd to collect per queue stats. 2257 */ 2258 static int 2259 virtio_dev_queue_stats_mapping_set(__rte_unused struct rte_eth_dev *eth_dev, 2260 __rte_unused uint16_t queue_id, __rte_unused uint8_t stat_idx, 2261 __rte_unused uint8_t is_rx) 2262 { 2263 return 0; 2264 } 2265 2266 RTE_PMD_EXPORT_NAME(net_virtio, __COUNTER__); 2267 RTE_PMD_REGISTER_PCI_TABLE(net_virtio, pci_id_virtio_map); 2268 RTE_PMD_REGISTER_KMOD_DEP(net_virtio, "* igb_uio | uio_pci_generic | vfio-pci"); 2269 2270 RTE_INIT(virtio_init_log) 2271 { 2272 virtio_logtype_init = rte_log_register("pmd.net.virtio.init"); 2273 if (virtio_logtype_init >= 0) 2274 rte_log_set_level(virtio_logtype_init, RTE_LOG_NOTICE); 2275 virtio_logtype_driver = rte_log_register("pmd.net.virtio.driver"); 2276 if (virtio_logtype_driver >= 0) 2277 rte_log_set_level(virtio_logtype_driver, RTE_LOG_NOTICE); 2278 } 2279