1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2016 IGEL Co., Ltd.
3 * Copyright(c) 2016-2018 Intel Corporation
4 */
5 #include <unistd.h>
6 #include <pthread.h>
7 #include <stdbool.h>
8 #include <sys/epoll.h>
9
10 #include <rte_mbuf.h>
11 #include <ethdev_driver.h>
12 #include <ethdev_vdev.h>
13 #include <rte_malloc.h>
14 #include <rte_memcpy.h>
15 #include <rte_bus_vdev.h>
16 #include <rte_kvargs.h>
17 #include <rte_vhost.h>
18 #include <rte_spinlock.h>
19
20 #include "rte_eth_vhost.h"
21
22 RTE_LOG_REGISTER_DEFAULT(vhost_logtype, NOTICE);
23
24 #define VHOST_LOG(level, ...) \
25 rte_log(RTE_LOG_ ## level, vhost_logtype, __VA_ARGS__)
26
27 enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
28
29 #define ETH_VHOST_IFACE_ARG "iface"
30 #define ETH_VHOST_QUEUES_ARG "queues"
31 #define ETH_VHOST_CLIENT_ARG "client"
32 #define ETH_VHOST_IOMMU_SUPPORT "iommu-support"
33 #define ETH_VHOST_POSTCOPY_SUPPORT "postcopy-support"
34 #define ETH_VHOST_VIRTIO_NET_F_HOST_TSO "tso"
35 #define ETH_VHOST_LINEAR_BUF "linear-buffer"
36 #define ETH_VHOST_EXT_BUF "ext-buffer"
37 #define VHOST_MAX_PKT_BURST 32
38
39 static const char *valid_arguments[] = {
40 ETH_VHOST_IFACE_ARG,
41 ETH_VHOST_QUEUES_ARG,
42 ETH_VHOST_CLIENT_ARG,
43 ETH_VHOST_IOMMU_SUPPORT,
44 ETH_VHOST_POSTCOPY_SUPPORT,
45 ETH_VHOST_VIRTIO_NET_F_HOST_TSO,
46 ETH_VHOST_LINEAR_BUF,
47 ETH_VHOST_EXT_BUF,
48 NULL
49 };
50
51 static struct rte_ether_addr base_eth_addr = {
52 .addr_bytes = {
53 0x56 /* V */,
54 0x48 /* H */,
55 0x4F /* O */,
56 0x53 /* S */,
57 0x54 /* T */,
58 0x00
59 }
60 };
61
62 enum vhost_xstats_pkts {
63 VHOST_UNDERSIZE_PKT = 0,
64 VHOST_64_PKT,
65 VHOST_65_TO_127_PKT,
66 VHOST_128_TO_255_PKT,
67 VHOST_256_TO_511_PKT,
68 VHOST_512_TO_1023_PKT,
69 VHOST_1024_TO_1522_PKT,
70 VHOST_1523_TO_MAX_PKT,
71 VHOST_BROADCAST_PKT,
72 VHOST_MULTICAST_PKT,
73 VHOST_UNICAST_PKT,
74 VHOST_PKT,
75 VHOST_BYTE,
76 VHOST_MISSED_PKT,
77 VHOST_ERRORS_PKT,
78 VHOST_ERRORS_FRAGMENTED,
79 VHOST_ERRORS_JABBER,
80 VHOST_UNKNOWN_PROTOCOL,
81 VHOST_XSTATS_MAX,
82 };
83
84 struct vhost_stats {
85 uint64_t pkts;
86 uint64_t bytes;
87 uint64_t missed_pkts;
88 uint64_t xstats[VHOST_XSTATS_MAX];
89 };
90
91 struct vhost_queue {
92 int vid;
93 rte_atomic32_t allow_queuing;
94 rte_atomic32_t while_queuing;
95 struct pmd_internal *internal;
96 struct rte_mempool *mb_pool;
97 uint16_t port;
98 uint16_t virtqueue_id;
99 struct vhost_stats stats;
100 int intr_enable;
101 rte_spinlock_t intr_lock;
102 };
103
104 struct pmd_internal {
105 rte_atomic32_t dev_attached;
106 char *iface_name;
107 uint64_t flags;
108 uint64_t disable_flags;
109 uint16_t max_queues;
110 int vid;
111 rte_atomic32_t started;
112 uint8_t vlan_strip;
113 };
114
115 struct internal_list {
116 TAILQ_ENTRY(internal_list) next;
117 struct rte_eth_dev *eth_dev;
118 };
119
120 TAILQ_HEAD(internal_list_head, internal_list);
121 static struct internal_list_head internal_list =
122 TAILQ_HEAD_INITIALIZER(internal_list);
123
124 static pthread_mutex_t internal_list_lock = PTHREAD_MUTEX_INITIALIZER;
125
126 static struct rte_eth_link pmd_link = {
127 .link_speed = 10000,
128 .link_duplex = RTE_ETH_LINK_FULL_DUPLEX,
129 .link_status = RTE_ETH_LINK_DOWN
130 };
131
132 struct rte_vhost_vring_state {
133 rte_spinlock_t lock;
134
135 bool cur[RTE_MAX_QUEUES_PER_PORT * 2];
136 bool seen[RTE_MAX_QUEUES_PER_PORT * 2];
137 unsigned int index;
138 unsigned int max_vring;
139 };
140
141 static struct rte_vhost_vring_state *vring_states[RTE_MAX_ETHPORTS];
142
143 #define VHOST_XSTATS_NAME_SIZE 64
144
145 struct vhost_xstats_name_off {
146 char name[VHOST_XSTATS_NAME_SIZE];
147 uint64_t offset;
148 };
149
150 /* [rx]_is prepended to the name string here */
151 static const struct vhost_xstats_name_off vhost_rxport_stat_strings[] = {
152 {"good_packets",
153 offsetof(struct vhost_queue, stats.xstats[VHOST_PKT])},
154 {"total_bytes",
155 offsetof(struct vhost_queue, stats.xstats[VHOST_BYTE])},
156 {"missed_pkts",
157 offsetof(struct vhost_queue, stats.xstats[VHOST_MISSED_PKT])},
158 {"broadcast_packets",
159 offsetof(struct vhost_queue, stats.xstats[VHOST_BROADCAST_PKT])},
160 {"multicast_packets",
161 offsetof(struct vhost_queue, stats.xstats[VHOST_MULTICAST_PKT])},
162 {"unicast_packets",
163 offsetof(struct vhost_queue, stats.xstats[VHOST_UNICAST_PKT])},
164 {"undersize_packets",
165 offsetof(struct vhost_queue, stats.xstats[VHOST_UNDERSIZE_PKT])},
166 {"size_64_packets",
167 offsetof(struct vhost_queue, stats.xstats[VHOST_64_PKT])},
168 {"size_65_to_127_packets",
169 offsetof(struct vhost_queue, stats.xstats[VHOST_65_TO_127_PKT])},
170 {"size_128_to_255_packets",
171 offsetof(struct vhost_queue, stats.xstats[VHOST_128_TO_255_PKT])},
172 {"size_256_to_511_packets",
173 offsetof(struct vhost_queue, stats.xstats[VHOST_256_TO_511_PKT])},
174 {"size_512_to_1023_packets",
175 offsetof(struct vhost_queue, stats.xstats[VHOST_512_TO_1023_PKT])},
176 {"size_1024_to_1522_packets",
177 offsetof(struct vhost_queue, stats.xstats[VHOST_1024_TO_1522_PKT])},
178 {"size_1523_to_max_packets",
179 offsetof(struct vhost_queue, stats.xstats[VHOST_1523_TO_MAX_PKT])},
180 {"errors_with_bad_CRC",
181 offsetof(struct vhost_queue, stats.xstats[VHOST_ERRORS_PKT])},
182 {"fragmented_errors",
183 offsetof(struct vhost_queue, stats.xstats[VHOST_ERRORS_FRAGMENTED])},
184 {"jabber_errors",
185 offsetof(struct vhost_queue, stats.xstats[VHOST_ERRORS_JABBER])},
186 {"unknown_protos_packets",
187 offsetof(struct vhost_queue, stats.xstats[VHOST_UNKNOWN_PROTOCOL])},
188 };
189
190 /* [tx]_ is prepended to the name string here */
191 static const struct vhost_xstats_name_off vhost_txport_stat_strings[] = {
192 {"good_packets",
193 offsetof(struct vhost_queue, stats.xstats[VHOST_PKT])},
194 {"total_bytes",
195 offsetof(struct vhost_queue, stats.xstats[VHOST_BYTE])},
196 {"missed_pkts",
197 offsetof(struct vhost_queue, stats.xstats[VHOST_MISSED_PKT])},
198 {"broadcast_packets",
199 offsetof(struct vhost_queue, stats.xstats[VHOST_BROADCAST_PKT])},
200 {"multicast_packets",
201 offsetof(struct vhost_queue, stats.xstats[VHOST_MULTICAST_PKT])},
202 {"unicast_packets",
203 offsetof(struct vhost_queue, stats.xstats[VHOST_UNICAST_PKT])},
204 {"undersize_packets",
205 offsetof(struct vhost_queue, stats.xstats[VHOST_UNDERSIZE_PKT])},
206 {"size_64_packets",
207 offsetof(struct vhost_queue, stats.xstats[VHOST_64_PKT])},
208 {"size_65_to_127_packets",
209 offsetof(struct vhost_queue, stats.xstats[VHOST_65_TO_127_PKT])},
210 {"size_128_to_255_packets",
211 offsetof(struct vhost_queue, stats.xstats[VHOST_128_TO_255_PKT])},
212 {"size_256_to_511_packets",
213 offsetof(struct vhost_queue, stats.xstats[VHOST_256_TO_511_PKT])},
214 {"size_512_to_1023_packets",
215 offsetof(struct vhost_queue, stats.xstats[VHOST_512_TO_1023_PKT])},
216 {"size_1024_to_1522_packets",
217 offsetof(struct vhost_queue, stats.xstats[VHOST_1024_TO_1522_PKT])},
218 {"size_1523_to_max_packets",
219 offsetof(struct vhost_queue, stats.xstats[VHOST_1523_TO_MAX_PKT])},
220 {"errors_with_bad_CRC",
221 offsetof(struct vhost_queue, stats.xstats[VHOST_ERRORS_PKT])},
222 };
223
224 #define VHOST_NB_XSTATS_RXPORT (sizeof(vhost_rxport_stat_strings) / \
225 sizeof(vhost_rxport_stat_strings[0]))
226
227 #define VHOST_NB_XSTATS_TXPORT (sizeof(vhost_txport_stat_strings) / \
228 sizeof(vhost_txport_stat_strings[0]))
229
230 static int
vhost_dev_xstats_reset(struct rte_eth_dev * dev)231 vhost_dev_xstats_reset(struct rte_eth_dev *dev)
232 {
233 struct vhost_queue *vq = NULL;
234 unsigned int i = 0;
235
236 for (i = 0; i < dev->data->nb_rx_queues; i++) {
237 vq = dev->data->rx_queues[i];
238 if (!vq)
239 continue;
240 memset(&vq->stats, 0, sizeof(vq->stats));
241 }
242 for (i = 0; i < dev->data->nb_tx_queues; i++) {
243 vq = dev->data->tx_queues[i];
244 if (!vq)
245 continue;
246 memset(&vq->stats, 0, sizeof(vq->stats));
247 }
248
249 return 0;
250 }
251
252 static int
vhost_dev_xstats_get_names(struct rte_eth_dev * dev __rte_unused,struct rte_eth_xstat_name * xstats_names,unsigned int limit __rte_unused)253 vhost_dev_xstats_get_names(struct rte_eth_dev *dev __rte_unused,
254 struct rte_eth_xstat_name *xstats_names,
255 unsigned int limit __rte_unused)
256 {
257 unsigned int t = 0;
258 int count = 0;
259 int nstats = VHOST_NB_XSTATS_RXPORT + VHOST_NB_XSTATS_TXPORT;
260
261 if (!xstats_names)
262 return nstats;
263 for (t = 0; t < VHOST_NB_XSTATS_RXPORT; t++) {
264 snprintf(xstats_names[count].name,
265 sizeof(xstats_names[count].name),
266 "rx_%s", vhost_rxport_stat_strings[t].name);
267 count++;
268 }
269 for (t = 0; t < VHOST_NB_XSTATS_TXPORT; t++) {
270 snprintf(xstats_names[count].name,
271 sizeof(xstats_names[count].name),
272 "tx_%s", vhost_txport_stat_strings[t].name);
273 count++;
274 }
275 return count;
276 }
277
278 static int
vhost_dev_xstats_get(struct rte_eth_dev * dev,struct rte_eth_xstat * xstats,unsigned int n)279 vhost_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
280 unsigned int n)
281 {
282 unsigned int i;
283 unsigned int t;
284 unsigned int count = 0;
285 struct vhost_queue *vq = NULL;
286 unsigned int nxstats = VHOST_NB_XSTATS_RXPORT + VHOST_NB_XSTATS_TXPORT;
287
288 if (n < nxstats)
289 return nxstats;
290
291 for (t = 0; t < VHOST_NB_XSTATS_RXPORT; t++) {
292 xstats[count].value = 0;
293 for (i = 0; i < dev->data->nb_rx_queues; i++) {
294 vq = dev->data->rx_queues[i];
295 if (!vq)
296 continue;
297 xstats[count].value +=
298 *(uint64_t *)(((char *)vq)
299 + vhost_rxport_stat_strings[t].offset);
300 }
301 xstats[count].id = count;
302 count++;
303 }
304 for (t = 0; t < VHOST_NB_XSTATS_TXPORT; t++) {
305 xstats[count].value = 0;
306 for (i = 0; i < dev->data->nb_tx_queues; i++) {
307 vq = dev->data->tx_queues[i];
308 if (!vq)
309 continue;
310 xstats[count].value +=
311 *(uint64_t *)(((char *)vq)
312 + vhost_txport_stat_strings[t].offset);
313 }
314 xstats[count].id = count;
315 count++;
316 }
317 return count;
318 }
319
320 static inline void
vhost_count_xcast_packets(struct vhost_queue * vq,struct rte_mbuf * mbuf)321 vhost_count_xcast_packets(struct vhost_queue *vq,
322 struct rte_mbuf *mbuf)
323 {
324 struct rte_ether_addr *ea = NULL;
325 struct vhost_stats *pstats = &vq->stats;
326
327 ea = rte_pktmbuf_mtod(mbuf, struct rte_ether_addr *);
328 if (rte_is_multicast_ether_addr(ea)) {
329 if (rte_is_broadcast_ether_addr(ea))
330 pstats->xstats[VHOST_BROADCAST_PKT]++;
331 else
332 pstats->xstats[VHOST_MULTICAST_PKT]++;
333 } else {
334 pstats->xstats[VHOST_UNICAST_PKT]++;
335 }
336 }
337
338 static __rte_always_inline void
vhost_update_single_packet_xstats(struct vhost_queue * vq,struct rte_mbuf * buf)339 vhost_update_single_packet_xstats(struct vhost_queue *vq, struct rte_mbuf *buf)
340 {
341 uint32_t pkt_len = 0;
342 uint64_t index;
343 struct vhost_stats *pstats = &vq->stats;
344
345 pstats->xstats[VHOST_PKT]++;
346 pkt_len = buf->pkt_len;
347 if (pkt_len == 64) {
348 pstats->xstats[VHOST_64_PKT]++;
349 } else if (pkt_len > 64 && pkt_len < 1024) {
350 index = (sizeof(pkt_len) * 8)
351 - __builtin_clz(pkt_len) - 5;
352 pstats->xstats[index]++;
353 } else {
354 if (pkt_len < 64)
355 pstats->xstats[VHOST_UNDERSIZE_PKT]++;
356 else if (pkt_len <= 1522)
357 pstats->xstats[VHOST_1024_TO_1522_PKT]++;
358 else if (pkt_len > 1522)
359 pstats->xstats[VHOST_1523_TO_MAX_PKT]++;
360 }
361 vhost_count_xcast_packets(vq, buf);
362 }
363
364 static uint16_t
eth_vhost_rx(void * q,struct rte_mbuf ** bufs,uint16_t nb_bufs)365 eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
366 {
367 struct vhost_queue *r = q;
368 uint16_t i, nb_rx = 0;
369 uint16_t nb_receive = nb_bufs;
370
371 if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0))
372 return 0;
373
374 rte_atomic32_set(&r->while_queuing, 1);
375
376 if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0))
377 goto out;
378
379 /* Dequeue packets from guest TX queue */
380 while (nb_receive) {
381 uint16_t nb_pkts;
382 uint16_t num = (uint16_t)RTE_MIN(nb_receive,
383 VHOST_MAX_PKT_BURST);
384
385 nb_pkts = rte_vhost_dequeue_burst(r->vid, r->virtqueue_id,
386 r->mb_pool, &bufs[nb_rx],
387 num);
388
389 nb_rx += nb_pkts;
390 nb_receive -= nb_pkts;
391 if (nb_pkts < num)
392 break;
393 }
394
395 r->stats.pkts += nb_rx;
396
397 for (i = 0; likely(i < nb_rx); i++) {
398 bufs[i]->port = r->port;
399 bufs[i]->vlan_tci = 0;
400
401 if (r->internal->vlan_strip)
402 rte_vlan_strip(bufs[i]);
403
404 r->stats.bytes += bufs[i]->pkt_len;
405 r->stats.xstats[VHOST_BYTE] += bufs[i]->pkt_len;
406
407 vhost_update_single_packet_xstats(r, bufs[i]);
408 }
409
410 out:
411 rte_atomic32_set(&r->while_queuing, 0);
412
413 return nb_rx;
414 }
415
416 static uint16_t
eth_vhost_tx(void * q,struct rte_mbuf ** bufs,uint16_t nb_bufs)417 eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
418 {
419 struct vhost_queue *r = q;
420 uint16_t i, nb_tx = 0;
421 uint16_t nb_send = 0;
422 uint64_t nb_bytes = 0;
423 uint64_t nb_missed = 0;
424
425 if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0))
426 return 0;
427
428 rte_atomic32_set(&r->while_queuing, 1);
429
430 if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0))
431 goto out;
432
433 for (i = 0; i < nb_bufs; i++) {
434 struct rte_mbuf *m = bufs[i];
435
436 /* Do VLAN tag insertion */
437 if (m->ol_flags & RTE_MBUF_F_TX_VLAN) {
438 int error = rte_vlan_insert(&m);
439 if (unlikely(error)) {
440 rte_pktmbuf_free(m);
441 continue;
442 }
443 }
444
445 bufs[nb_send] = m;
446 ++nb_send;
447 }
448
449 /* Enqueue packets to guest RX queue */
450 while (nb_send) {
451 uint16_t nb_pkts;
452 uint16_t num = (uint16_t)RTE_MIN(nb_send,
453 VHOST_MAX_PKT_BURST);
454
455 nb_pkts = rte_vhost_enqueue_burst(r->vid, r->virtqueue_id,
456 &bufs[nb_tx], num);
457
458 nb_tx += nb_pkts;
459 nb_send -= nb_pkts;
460 if (nb_pkts < num)
461 break;
462 }
463
464 for (i = 0; likely(i < nb_tx); i++) {
465 nb_bytes += bufs[i]->pkt_len;
466 vhost_update_single_packet_xstats(r, bufs[i]);
467 }
468
469 nb_missed = nb_bufs - nb_tx;
470
471 r->stats.pkts += nb_tx;
472 r->stats.bytes += nb_bytes;
473 r->stats.missed_pkts += nb_missed;
474
475 r->stats.xstats[VHOST_BYTE] += nb_bytes;
476 r->stats.xstats[VHOST_MISSED_PKT] += nb_missed;
477 r->stats.xstats[VHOST_UNICAST_PKT] += nb_missed;
478
479 /* According to RFC2863, ifHCOutUcastPkts, ifHCOutMulticastPkts and
480 * ifHCOutBroadcastPkts counters are increased when packets are not
481 * transmitted successfully.
482 */
483 for (i = nb_tx; i < nb_bufs; i++)
484 vhost_count_xcast_packets(r, bufs[i]);
485
486 for (i = 0; likely(i < nb_tx); i++)
487 rte_pktmbuf_free(bufs[i]);
488 out:
489 rte_atomic32_set(&r->while_queuing, 0);
490
491 return nb_tx;
492 }
493
494 static inline struct internal_list *
find_internal_resource(char * ifname)495 find_internal_resource(char *ifname)
496 {
497 int found = 0;
498 struct internal_list *list;
499 struct pmd_internal *internal;
500
501 if (ifname == NULL)
502 return NULL;
503
504 pthread_mutex_lock(&internal_list_lock);
505
506 TAILQ_FOREACH(list, &internal_list, next) {
507 internal = list->eth_dev->data->dev_private;
508 if (!strcmp(internal->iface_name, ifname)) {
509 found = 1;
510 break;
511 }
512 }
513
514 pthread_mutex_unlock(&internal_list_lock);
515
516 if (!found)
517 return NULL;
518
519 return list;
520 }
521
522 static int
eth_vhost_update_intr(struct rte_eth_dev * eth_dev,uint16_t rxq_idx)523 eth_vhost_update_intr(struct rte_eth_dev *eth_dev, uint16_t rxq_idx)
524 {
525 struct rte_intr_handle *handle = eth_dev->intr_handle;
526 struct rte_epoll_event rev, *elist;
527 int epfd, ret;
528
529 if (handle == NULL)
530 return 0;
531
532 elist = rte_intr_elist_index_get(handle, rxq_idx);
533 if (rte_intr_efds_index_get(handle, rxq_idx) == elist->fd)
534 return 0;
535
536 VHOST_LOG(INFO, "kickfd for rxq-%d was changed, updating handler.\n",
537 rxq_idx);
538
539 if (elist->fd != -1)
540 VHOST_LOG(ERR, "Unexpected previous kickfd value (Got %d, expected -1).\n",
541 elist->fd);
542
543 /*
544 * First remove invalid epoll event, and then install
545 * the new one. May be solved with a proper API in the
546 * future.
547 */
548 epfd = elist->epfd;
549 rev = *elist;
550 ret = rte_epoll_ctl(epfd, EPOLL_CTL_DEL, rev.fd,
551 elist);
552 if (ret) {
553 VHOST_LOG(ERR, "Delete epoll event failed.\n");
554 return ret;
555 }
556
557 rev.fd = rte_intr_efds_index_get(handle, rxq_idx);
558 if (rte_intr_elist_index_set(handle, rxq_idx, rev))
559 return -rte_errno;
560
561 elist = rte_intr_elist_index_get(handle, rxq_idx);
562 ret = rte_epoll_ctl(epfd, EPOLL_CTL_ADD, rev.fd, elist);
563 if (ret) {
564 VHOST_LOG(ERR, "Add epoll event failed.\n");
565 return ret;
566 }
567
568 return 0;
569 }
570
571 static int
eth_rxq_intr_enable(struct rte_eth_dev * dev,uint16_t qid)572 eth_rxq_intr_enable(struct rte_eth_dev *dev, uint16_t qid)
573 {
574 struct rte_vhost_vring vring;
575 struct vhost_queue *vq;
576 int old_intr_enable, ret = 0;
577
578 vq = dev->data->rx_queues[qid];
579 if (!vq) {
580 VHOST_LOG(ERR, "rxq%d is not setup yet\n", qid);
581 return -1;
582 }
583
584 rte_spinlock_lock(&vq->intr_lock);
585 old_intr_enable = vq->intr_enable;
586 vq->intr_enable = 1;
587 ret = eth_vhost_update_intr(dev, qid);
588 rte_spinlock_unlock(&vq->intr_lock);
589
590 if (ret < 0) {
591 VHOST_LOG(ERR, "Failed to update rxq%d's intr\n", qid);
592 vq->intr_enable = old_intr_enable;
593 return ret;
594 }
595
596 ret = rte_vhost_get_vhost_vring(vq->vid, (qid << 1) + 1, &vring);
597 if (ret < 0) {
598 VHOST_LOG(ERR, "Failed to get rxq%d's vring\n", qid);
599 return ret;
600 }
601 VHOST_LOG(INFO, "Enable interrupt for rxq%d\n", qid);
602 rte_vhost_enable_guest_notification(vq->vid, (qid << 1) + 1, 1);
603 rte_wmb();
604
605 return ret;
606 }
607
608 static int
eth_rxq_intr_disable(struct rte_eth_dev * dev,uint16_t qid)609 eth_rxq_intr_disable(struct rte_eth_dev *dev, uint16_t qid)
610 {
611 struct rte_vhost_vring vring;
612 struct vhost_queue *vq;
613 int ret = 0;
614
615 vq = dev->data->rx_queues[qid];
616 if (!vq) {
617 VHOST_LOG(ERR, "rxq%d is not setup yet\n", qid);
618 return -1;
619 }
620
621 ret = rte_vhost_get_vhost_vring(vq->vid, (qid << 1) + 1, &vring);
622 if (ret < 0) {
623 VHOST_LOG(ERR, "Failed to get rxq%d's vring", qid);
624 return ret;
625 }
626 VHOST_LOG(INFO, "Disable interrupt for rxq%d\n", qid);
627 rte_vhost_enable_guest_notification(vq->vid, (qid << 1) + 1, 0);
628 rte_wmb();
629
630 vq->intr_enable = 0;
631
632 return 0;
633 }
634
635 static void
eth_vhost_uninstall_intr(struct rte_eth_dev * dev)636 eth_vhost_uninstall_intr(struct rte_eth_dev *dev)
637 {
638 struct rte_intr_handle *intr_handle = dev->intr_handle;
639
640 if (intr_handle != NULL) {
641 rte_intr_vec_list_free(intr_handle);
642 rte_intr_instance_free(intr_handle);
643 }
644 dev->intr_handle = NULL;
645 }
646
647 static int
eth_vhost_install_intr(struct rte_eth_dev * dev)648 eth_vhost_install_intr(struct rte_eth_dev *dev)
649 {
650 struct rte_vhost_vring vring;
651 struct vhost_queue *vq;
652 int nb_rxq = dev->data->nb_rx_queues;
653 int i;
654 int ret;
655
656 /* uninstall firstly if we are reconnecting */
657 if (dev->intr_handle != NULL)
658 eth_vhost_uninstall_intr(dev);
659
660 dev->intr_handle = rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_PRIVATE);
661 if (dev->intr_handle == NULL) {
662 VHOST_LOG(ERR, "Fail to allocate intr_handle\n");
663 return -ENOMEM;
664 }
665 if (rte_intr_efd_counter_size_set(dev->intr_handle, sizeof(uint64_t)))
666 return -rte_errno;
667
668 if (rte_intr_vec_list_alloc(dev->intr_handle, NULL, nb_rxq)) {
669 VHOST_LOG(ERR,
670 "Failed to allocate memory for interrupt vector\n");
671 rte_intr_instance_free(dev->intr_handle);
672 return -ENOMEM;
673 }
674
675
676 VHOST_LOG(INFO, "Prepare intr vec\n");
677 for (i = 0; i < nb_rxq; i++) {
678 if (rte_intr_vec_list_index_set(dev->intr_handle, i, RTE_INTR_VEC_RXTX_OFFSET + i))
679 return -rte_errno;
680 if (rte_intr_efds_index_set(dev->intr_handle, i, -1))
681 return -rte_errno;
682 vq = dev->data->rx_queues[i];
683 if (!vq) {
684 VHOST_LOG(INFO, "rxq-%d not setup yet, skip!\n", i);
685 continue;
686 }
687
688 ret = rte_vhost_get_vhost_vring(vq->vid, (i << 1) + 1, &vring);
689 if (ret < 0) {
690 VHOST_LOG(INFO,
691 "Failed to get rxq-%d's vring, skip!\n", i);
692 continue;
693 }
694
695 if (vring.kickfd < 0) {
696 VHOST_LOG(INFO,
697 "rxq-%d's kickfd is invalid, skip!\n", i);
698 continue;
699 }
700
701 if (rte_intr_efds_index_set(dev->intr_handle, i, vring.kickfd))
702 continue;
703 VHOST_LOG(INFO, "Installed intr vec for rxq-%d\n", i);
704 }
705
706 if (rte_intr_nb_efd_set(dev->intr_handle, nb_rxq))
707 return -rte_errno;
708
709 if (rte_intr_max_intr_set(dev->intr_handle, nb_rxq + 1))
710 return -rte_errno;
711
712 if (rte_intr_type_set(dev->intr_handle, RTE_INTR_HANDLE_VDEV))
713 return -rte_errno;
714
715 return 0;
716 }
717
718 static void
update_queuing_status(struct rte_eth_dev * dev)719 update_queuing_status(struct rte_eth_dev *dev)
720 {
721 struct pmd_internal *internal = dev->data->dev_private;
722 struct vhost_queue *vq;
723 struct rte_vhost_vring_state *state;
724 unsigned int i;
725 int allow_queuing = 1;
726
727 if (!dev->data->rx_queues || !dev->data->tx_queues)
728 return;
729
730 if (rte_atomic32_read(&internal->started) == 0 ||
731 rte_atomic32_read(&internal->dev_attached) == 0)
732 allow_queuing = 0;
733
734 state = vring_states[dev->data->port_id];
735
736 /* Wait until rx/tx_pkt_burst stops accessing vhost device */
737 for (i = 0; i < dev->data->nb_rx_queues; i++) {
738 vq = dev->data->rx_queues[i];
739 if (vq == NULL)
740 continue;
741 if (allow_queuing && state->cur[vq->virtqueue_id])
742 rte_atomic32_set(&vq->allow_queuing, 1);
743 else
744 rte_atomic32_set(&vq->allow_queuing, 0);
745 while (rte_atomic32_read(&vq->while_queuing))
746 rte_pause();
747 }
748
749 for (i = 0; i < dev->data->nb_tx_queues; i++) {
750 vq = dev->data->tx_queues[i];
751 if (vq == NULL)
752 continue;
753 if (allow_queuing && state->cur[vq->virtqueue_id])
754 rte_atomic32_set(&vq->allow_queuing, 1);
755 else
756 rte_atomic32_set(&vq->allow_queuing, 0);
757 while (rte_atomic32_read(&vq->while_queuing))
758 rte_pause();
759 }
760 }
761
762 static void
queue_setup(struct rte_eth_dev * eth_dev,struct pmd_internal * internal)763 queue_setup(struct rte_eth_dev *eth_dev, struct pmd_internal *internal)
764 {
765 struct vhost_queue *vq;
766 int i;
767
768 for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
769 vq = eth_dev->data->rx_queues[i];
770 if (!vq)
771 continue;
772 vq->vid = internal->vid;
773 vq->internal = internal;
774 vq->port = eth_dev->data->port_id;
775 }
776 for (i = 0; i < eth_dev->data->nb_tx_queues; i++) {
777 vq = eth_dev->data->tx_queues[i];
778 if (!vq)
779 continue;
780 vq->vid = internal->vid;
781 vq->internal = internal;
782 vq->port = eth_dev->data->port_id;
783 }
784 }
785
786 static int
new_device(int vid)787 new_device(int vid)
788 {
789 struct rte_eth_dev *eth_dev;
790 struct internal_list *list;
791 struct pmd_internal *internal;
792 struct rte_eth_conf *dev_conf;
793 unsigned i;
794 char ifname[PATH_MAX];
795 #ifdef RTE_LIBRTE_VHOST_NUMA
796 int newnode;
797 #endif
798
799 rte_vhost_get_ifname(vid, ifname, sizeof(ifname));
800 list = find_internal_resource(ifname);
801 if (list == NULL) {
802 VHOST_LOG(INFO, "Invalid device name: %s\n", ifname);
803 return -1;
804 }
805
806 eth_dev = list->eth_dev;
807 internal = eth_dev->data->dev_private;
808 dev_conf = ð_dev->data->dev_conf;
809
810 #ifdef RTE_LIBRTE_VHOST_NUMA
811 newnode = rte_vhost_get_numa_node(vid);
812 if (newnode >= 0)
813 eth_dev->data->numa_node = newnode;
814 #endif
815
816 internal->vid = vid;
817 if (rte_atomic32_read(&internal->started) == 1) {
818 queue_setup(eth_dev, internal);
819
820 if (dev_conf->intr_conf.rxq) {
821 if (eth_vhost_install_intr(eth_dev) < 0) {
822 VHOST_LOG(INFO,
823 "Failed to install interrupt handler.");
824 return -1;
825 }
826 }
827 } else {
828 VHOST_LOG(INFO, "RX/TX queues not exist yet\n");
829 }
830
831 for (i = 0; i < rte_vhost_get_vring_num(vid); i++)
832 rte_vhost_enable_guest_notification(vid, i, 0);
833
834 rte_vhost_get_mtu(vid, ð_dev->data->mtu);
835
836 eth_dev->data->dev_link.link_status = RTE_ETH_LINK_UP;
837
838 rte_atomic32_set(&internal->dev_attached, 1);
839 update_queuing_status(eth_dev);
840
841 VHOST_LOG(INFO, "Vhost device %d created\n", vid);
842
843 rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL);
844
845 return 0;
846 }
847
848 static void
destroy_device(int vid)849 destroy_device(int vid)
850 {
851 struct rte_eth_dev *eth_dev;
852 struct pmd_internal *internal;
853 struct vhost_queue *vq;
854 struct internal_list *list;
855 char ifname[PATH_MAX];
856 unsigned i;
857 struct rte_vhost_vring_state *state;
858
859 rte_vhost_get_ifname(vid, ifname, sizeof(ifname));
860 list = find_internal_resource(ifname);
861 if (list == NULL) {
862 VHOST_LOG(ERR, "Invalid interface name: %s\n", ifname);
863 return;
864 }
865 eth_dev = list->eth_dev;
866 internal = eth_dev->data->dev_private;
867
868 rte_atomic32_set(&internal->dev_attached, 0);
869 update_queuing_status(eth_dev);
870
871 eth_dev->data->dev_link.link_status = RTE_ETH_LINK_DOWN;
872
873 if (eth_dev->data->rx_queues && eth_dev->data->tx_queues) {
874 for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
875 vq = eth_dev->data->rx_queues[i];
876 if (!vq)
877 continue;
878 vq->vid = -1;
879 }
880 for (i = 0; i < eth_dev->data->nb_tx_queues; i++) {
881 vq = eth_dev->data->tx_queues[i];
882 if (!vq)
883 continue;
884 vq->vid = -1;
885 }
886 }
887
888 state = vring_states[eth_dev->data->port_id];
889 rte_spinlock_lock(&state->lock);
890 for (i = 0; i <= state->max_vring; i++) {
891 state->cur[i] = false;
892 state->seen[i] = false;
893 }
894 state->max_vring = 0;
895 rte_spinlock_unlock(&state->lock);
896
897 VHOST_LOG(INFO, "Vhost device %d destroyed\n", vid);
898 eth_vhost_uninstall_intr(eth_dev);
899
900 rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL);
901 }
902
903 static int
vring_conf_update(int vid,struct rte_eth_dev * eth_dev,uint16_t vring_id)904 vring_conf_update(int vid, struct rte_eth_dev *eth_dev, uint16_t vring_id)
905 {
906 struct rte_eth_conf *dev_conf = ð_dev->data->dev_conf;
907 struct pmd_internal *internal = eth_dev->data->dev_private;
908 struct vhost_queue *vq;
909 struct rte_vhost_vring vring;
910 int rx_idx = vring_id % 2 ? (vring_id - 1) >> 1 : -1;
911 int ret = 0;
912
913 /*
914 * The vring kickfd may be changed after the new device notification.
915 * Update it when the vring state is updated.
916 */
917 if (rx_idx >= 0 && rx_idx < eth_dev->data->nb_rx_queues &&
918 rte_atomic32_read(&internal->dev_attached) &&
919 rte_atomic32_read(&internal->started) &&
920 dev_conf->intr_conf.rxq) {
921 ret = rte_vhost_get_vhost_vring(vid, vring_id, &vring);
922 if (ret) {
923 VHOST_LOG(ERR, "Failed to get vring %d information.\n",
924 vring_id);
925 return ret;
926 }
927
928 if (rte_intr_efds_index_set(eth_dev->intr_handle, rx_idx,
929 vring.kickfd))
930 return -rte_errno;
931
932 vq = eth_dev->data->rx_queues[rx_idx];
933 if (!vq) {
934 VHOST_LOG(ERR, "rxq%d is not setup yet\n", rx_idx);
935 return -1;
936 }
937
938 rte_spinlock_lock(&vq->intr_lock);
939 if (vq->intr_enable)
940 ret = eth_vhost_update_intr(eth_dev, rx_idx);
941 rte_spinlock_unlock(&vq->intr_lock);
942 }
943
944 return ret;
945 }
946
947 static int
vring_state_changed(int vid,uint16_t vring,int enable)948 vring_state_changed(int vid, uint16_t vring, int enable)
949 {
950 struct rte_vhost_vring_state *state;
951 struct rte_eth_dev *eth_dev;
952 struct internal_list *list;
953 char ifname[PATH_MAX];
954
955 rte_vhost_get_ifname(vid, ifname, sizeof(ifname));
956 list = find_internal_resource(ifname);
957 if (list == NULL) {
958 VHOST_LOG(ERR, "Invalid interface name: %s\n", ifname);
959 return -1;
960 }
961
962 eth_dev = list->eth_dev;
963 /* won't be NULL */
964 state = vring_states[eth_dev->data->port_id];
965
966 if (enable && vring_conf_update(vid, eth_dev, vring))
967 VHOST_LOG(INFO, "Failed to update vring-%d configuration.\n",
968 (int)vring);
969
970 rte_spinlock_lock(&state->lock);
971 if (state->cur[vring] == enable) {
972 rte_spinlock_unlock(&state->lock);
973 return 0;
974 }
975 state->cur[vring] = enable;
976 state->max_vring = RTE_MAX(vring, state->max_vring);
977 rte_spinlock_unlock(&state->lock);
978
979 update_queuing_status(eth_dev);
980
981 VHOST_LOG(INFO, "vring%u is %s\n",
982 vring, enable ? "enabled" : "disabled");
983
984 rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_QUEUE_STATE, NULL);
985
986 return 0;
987 }
988
989 static struct rte_vhost_device_ops vhost_ops = {
990 .new_device = new_device,
991 .destroy_device = destroy_device,
992 .vring_state_changed = vring_state_changed,
993 };
994
995 static int
vhost_driver_setup(struct rte_eth_dev * eth_dev)996 vhost_driver_setup(struct rte_eth_dev *eth_dev)
997 {
998 struct pmd_internal *internal = eth_dev->data->dev_private;
999 struct internal_list *list = NULL;
1000 struct rte_vhost_vring_state *vring_state = NULL;
1001 unsigned int numa_node = eth_dev->device->numa_node;
1002 const char *name = eth_dev->device->name;
1003
1004 /* Don't try to setup again if it has already been done. */
1005 list = find_internal_resource(internal->iface_name);
1006 if (list)
1007 return 0;
1008
1009 list = rte_zmalloc_socket(name, sizeof(*list), 0, numa_node);
1010 if (list == NULL)
1011 return -1;
1012
1013 vring_state = rte_zmalloc_socket(name, sizeof(*vring_state),
1014 0, numa_node);
1015 if (vring_state == NULL)
1016 goto free_list;
1017
1018 list->eth_dev = eth_dev;
1019 pthread_mutex_lock(&internal_list_lock);
1020 TAILQ_INSERT_TAIL(&internal_list, list, next);
1021 pthread_mutex_unlock(&internal_list_lock);
1022
1023 rte_spinlock_init(&vring_state->lock);
1024 vring_states[eth_dev->data->port_id] = vring_state;
1025
1026 if (rte_vhost_driver_register(internal->iface_name, internal->flags))
1027 goto list_remove;
1028
1029 if (internal->disable_flags) {
1030 if (rte_vhost_driver_disable_features(internal->iface_name,
1031 internal->disable_flags))
1032 goto drv_unreg;
1033 }
1034
1035 if (rte_vhost_driver_callback_register(internal->iface_name,
1036 &vhost_ops) < 0) {
1037 VHOST_LOG(ERR, "Can't register callbacks\n");
1038 goto drv_unreg;
1039 }
1040
1041 if (rte_vhost_driver_start(internal->iface_name) < 0) {
1042 VHOST_LOG(ERR, "Failed to start driver for %s\n",
1043 internal->iface_name);
1044 goto drv_unreg;
1045 }
1046
1047 return 0;
1048
1049 drv_unreg:
1050 rte_vhost_driver_unregister(internal->iface_name);
1051 list_remove:
1052 vring_states[eth_dev->data->port_id] = NULL;
1053 pthread_mutex_lock(&internal_list_lock);
1054 TAILQ_REMOVE(&internal_list, list, next);
1055 pthread_mutex_unlock(&internal_list_lock);
1056 rte_free(vring_state);
1057 free_list:
1058 rte_free(list);
1059
1060 return -1;
1061 }
1062
1063 int
rte_eth_vhost_get_queue_event(uint16_t port_id,struct rte_eth_vhost_queue_event * event)1064 rte_eth_vhost_get_queue_event(uint16_t port_id,
1065 struct rte_eth_vhost_queue_event *event)
1066 {
1067 struct rte_vhost_vring_state *state;
1068 unsigned int i;
1069 int idx;
1070
1071 if (port_id >= RTE_MAX_ETHPORTS) {
1072 VHOST_LOG(ERR, "Invalid port id\n");
1073 return -1;
1074 }
1075
1076 state = vring_states[port_id];
1077 if (!state) {
1078 VHOST_LOG(ERR, "Unused port\n");
1079 return -1;
1080 }
1081
1082 rte_spinlock_lock(&state->lock);
1083 for (i = 0; i <= state->max_vring; i++) {
1084 idx = state->index++ % (state->max_vring + 1);
1085
1086 if (state->cur[idx] != state->seen[idx]) {
1087 state->seen[idx] = state->cur[idx];
1088 event->queue_id = idx / 2;
1089 event->rx = idx & 1;
1090 event->enable = state->cur[idx];
1091 rte_spinlock_unlock(&state->lock);
1092 return 0;
1093 }
1094 }
1095 rte_spinlock_unlock(&state->lock);
1096
1097 return -1;
1098 }
1099
1100 int
rte_eth_vhost_get_vid_from_port_id(uint16_t port_id)1101 rte_eth_vhost_get_vid_from_port_id(uint16_t port_id)
1102 {
1103 struct internal_list *list;
1104 struct rte_eth_dev *eth_dev;
1105 struct vhost_queue *vq;
1106 int vid = -1;
1107
1108 if (!rte_eth_dev_is_valid_port(port_id))
1109 return -1;
1110
1111 pthread_mutex_lock(&internal_list_lock);
1112
1113 TAILQ_FOREACH(list, &internal_list, next) {
1114 eth_dev = list->eth_dev;
1115 if (eth_dev->data->port_id == port_id) {
1116 vq = eth_dev->data->rx_queues[0];
1117 if (vq) {
1118 vid = vq->vid;
1119 }
1120 break;
1121 }
1122 }
1123
1124 pthread_mutex_unlock(&internal_list_lock);
1125
1126 return vid;
1127 }
1128
1129 static int
eth_dev_configure(struct rte_eth_dev * dev)1130 eth_dev_configure(struct rte_eth_dev *dev)
1131 {
1132 struct pmd_internal *internal = dev->data->dev_private;
1133 const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
1134
1135 /* NOTE: the same process has to operate a vhost interface
1136 * from beginning to end (from eth_dev configure to eth_dev close).
1137 * It is user's responsibility at the moment.
1138 */
1139 if (vhost_driver_setup(dev) < 0)
1140 return -1;
1141
1142 internal->vlan_strip = !!(rxmode->offloads & RTE_ETH_RX_OFFLOAD_VLAN_STRIP);
1143
1144 return 0;
1145 }
1146
1147 static int
eth_dev_start(struct rte_eth_dev * eth_dev)1148 eth_dev_start(struct rte_eth_dev *eth_dev)
1149 {
1150 struct pmd_internal *internal = eth_dev->data->dev_private;
1151 struct rte_eth_conf *dev_conf = ð_dev->data->dev_conf;
1152
1153 queue_setup(eth_dev, internal);
1154
1155 if (rte_atomic32_read(&internal->dev_attached) == 1) {
1156 if (dev_conf->intr_conf.rxq) {
1157 if (eth_vhost_install_intr(eth_dev) < 0) {
1158 VHOST_LOG(INFO,
1159 "Failed to install interrupt handler.");
1160 return -1;
1161 }
1162 }
1163 }
1164
1165 rte_atomic32_set(&internal->started, 1);
1166 update_queuing_status(eth_dev);
1167
1168 return 0;
1169 }
1170
1171 static int
eth_dev_stop(struct rte_eth_dev * dev)1172 eth_dev_stop(struct rte_eth_dev *dev)
1173 {
1174 struct pmd_internal *internal = dev->data->dev_private;
1175
1176 dev->data->dev_started = 0;
1177 rte_atomic32_set(&internal->started, 0);
1178 update_queuing_status(dev);
1179
1180 return 0;
1181 }
1182
1183 static int
eth_dev_close(struct rte_eth_dev * dev)1184 eth_dev_close(struct rte_eth_dev *dev)
1185 {
1186 struct pmd_internal *internal;
1187 struct internal_list *list;
1188 unsigned int i, ret;
1189
1190 if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1191 return 0;
1192
1193 internal = dev->data->dev_private;
1194 if (!internal)
1195 return 0;
1196
1197 ret = eth_dev_stop(dev);
1198
1199 list = find_internal_resource(internal->iface_name);
1200 if (list) {
1201 rte_vhost_driver_unregister(internal->iface_name);
1202 pthread_mutex_lock(&internal_list_lock);
1203 TAILQ_REMOVE(&internal_list, list, next);
1204 pthread_mutex_unlock(&internal_list_lock);
1205 rte_free(list);
1206 }
1207
1208 if (dev->data->rx_queues)
1209 for (i = 0; i < dev->data->nb_rx_queues; i++)
1210 rte_free(dev->data->rx_queues[i]);
1211
1212 if (dev->data->tx_queues)
1213 for (i = 0; i < dev->data->nb_tx_queues; i++)
1214 rte_free(dev->data->tx_queues[i]);
1215
1216 rte_free(internal->iface_name);
1217 rte_free(internal);
1218
1219 dev->data->dev_private = NULL;
1220
1221 rte_free(vring_states[dev->data->port_id]);
1222 vring_states[dev->data->port_id] = NULL;
1223
1224 return ret;
1225 }
1226
1227 static int
eth_rx_queue_setup(struct rte_eth_dev * dev,uint16_t rx_queue_id,uint16_t nb_rx_desc __rte_unused,unsigned int socket_id,const struct rte_eth_rxconf * rx_conf __rte_unused,struct rte_mempool * mb_pool)1228 eth_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
1229 uint16_t nb_rx_desc __rte_unused,
1230 unsigned int socket_id,
1231 const struct rte_eth_rxconf *rx_conf __rte_unused,
1232 struct rte_mempool *mb_pool)
1233 {
1234 struct vhost_queue *vq;
1235
1236 vq = rte_zmalloc_socket(NULL, sizeof(struct vhost_queue),
1237 RTE_CACHE_LINE_SIZE, socket_id);
1238 if (vq == NULL) {
1239 VHOST_LOG(ERR, "Failed to allocate memory for rx queue\n");
1240 return -ENOMEM;
1241 }
1242
1243 vq->mb_pool = mb_pool;
1244 vq->virtqueue_id = rx_queue_id * VIRTIO_QNUM + VIRTIO_TXQ;
1245 rte_spinlock_init(&vq->intr_lock);
1246 dev->data->rx_queues[rx_queue_id] = vq;
1247
1248 return 0;
1249 }
1250
1251 static int
eth_tx_queue_setup(struct rte_eth_dev * dev,uint16_t tx_queue_id,uint16_t nb_tx_desc __rte_unused,unsigned int socket_id,const struct rte_eth_txconf * tx_conf __rte_unused)1252 eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
1253 uint16_t nb_tx_desc __rte_unused,
1254 unsigned int socket_id,
1255 const struct rte_eth_txconf *tx_conf __rte_unused)
1256 {
1257 struct vhost_queue *vq;
1258
1259 vq = rte_zmalloc_socket(NULL, sizeof(struct vhost_queue),
1260 RTE_CACHE_LINE_SIZE, socket_id);
1261 if (vq == NULL) {
1262 VHOST_LOG(ERR, "Failed to allocate memory for tx queue\n");
1263 return -ENOMEM;
1264 }
1265
1266 vq->virtqueue_id = tx_queue_id * VIRTIO_QNUM + VIRTIO_RXQ;
1267 rte_spinlock_init(&vq->intr_lock);
1268 dev->data->tx_queues[tx_queue_id] = vq;
1269
1270 return 0;
1271 }
1272
1273 static int
eth_dev_info(struct rte_eth_dev * dev,struct rte_eth_dev_info * dev_info)1274 eth_dev_info(struct rte_eth_dev *dev,
1275 struct rte_eth_dev_info *dev_info)
1276 {
1277 struct pmd_internal *internal;
1278
1279 internal = dev->data->dev_private;
1280 if (internal == NULL) {
1281 VHOST_LOG(ERR, "Invalid device specified\n");
1282 return -ENODEV;
1283 }
1284
1285 dev_info->max_mac_addrs = 1;
1286 dev_info->max_rx_pktlen = (uint32_t)-1;
1287 dev_info->max_rx_queues = internal->max_queues;
1288 dev_info->max_tx_queues = internal->max_queues;
1289 dev_info->min_rx_bufsize = 0;
1290
1291 dev_info->tx_offload_capa = RTE_ETH_TX_OFFLOAD_MULTI_SEGS |
1292 RTE_ETH_TX_OFFLOAD_VLAN_INSERT;
1293 dev_info->rx_offload_capa = RTE_ETH_RX_OFFLOAD_VLAN_STRIP;
1294
1295 return 0;
1296 }
1297
1298 static int
eth_stats_get(struct rte_eth_dev * dev,struct rte_eth_stats * stats)1299 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1300 {
1301 unsigned i;
1302 unsigned long rx_total = 0, tx_total = 0;
1303 unsigned long rx_total_bytes = 0, tx_total_bytes = 0;
1304 struct vhost_queue *vq;
1305
1306 for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
1307 i < dev->data->nb_rx_queues; i++) {
1308 if (dev->data->rx_queues[i] == NULL)
1309 continue;
1310 vq = dev->data->rx_queues[i];
1311 stats->q_ipackets[i] = vq->stats.pkts;
1312 rx_total += stats->q_ipackets[i];
1313
1314 stats->q_ibytes[i] = vq->stats.bytes;
1315 rx_total_bytes += stats->q_ibytes[i];
1316 }
1317
1318 for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
1319 i < dev->data->nb_tx_queues; i++) {
1320 if (dev->data->tx_queues[i] == NULL)
1321 continue;
1322 vq = dev->data->tx_queues[i];
1323 stats->q_opackets[i] = vq->stats.pkts;
1324 tx_total += stats->q_opackets[i];
1325
1326 stats->q_obytes[i] = vq->stats.bytes;
1327 tx_total_bytes += stats->q_obytes[i];
1328 }
1329
1330 stats->ipackets = rx_total;
1331 stats->opackets = tx_total;
1332 stats->ibytes = rx_total_bytes;
1333 stats->obytes = tx_total_bytes;
1334
1335 return 0;
1336 }
1337
1338 static int
eth_stats_reset(struct rte_eth_dev * dev)1339 eth_stats_reset(struct rte_eth_dev *dev)
1340 {
1341 struct vhost_queue *vq;
1342 unsigned i;
1343
1344 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1345 if (dev->data->rx_queues[i] == NULL)
1346 continue;
1347 vq = dev->data->rx_queues[i];
1348 vq->stats.pkts = 0;
1349 vq->stats.bytes = 0;
1350 }
1351 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1352 if (dev->data->tx_queues[i] == NULL)
1353 continue;
1354 vq = dev->data->tx_queues[i];
1355 vq->stats.pkts = 0;
1356 vq->stats.bytes = 0;
1357 vq->stats.missed_pkts = 0;
1358 }
1359
1360 return 0;
1361 }
1362
1363 static void
eth_rx_queue_release(struct rte_eth_dev * dev,uint16_t qid)1364 eth_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
1365 {
1366 rte_free(dev->data->rx_queues[qid]);
1367 }
1368
1369 static void
eth_tx_queue_release(struct rte_eth_dev * dev,uint16_t qid)1370 eth_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
1371 {
1372 rte_free(dev->data->tx_queues[qid]);
1373 }
1374
1375 static int
eth_tx_done_cleanup(void * txq __rte_unused,uint32_t free_cnt __rte_unused)1376 eth_tx_done_cleanup(void *txq __rte_unused, uint32_t free_cnt __rte_unused)
1377 {
1378 /*
1379 * vHost does not hang onto mbuf. eth_vhost_tx() copies packet data
1380 * and releases mbuf, so nothing to cleanup.
1381 */
1382 return 0;
1383 }
1384
1385 static int
eth_link_update(struct rte_eth_dev * dev __rte_unused,int wait_to_complete __rte_unused)1386 eth_link_update(struct rte_eth_dev *dev __rte_unused,
1387 int wait_to_complete __rte_unused)
1388 {
1389 return 0;
1390 }
1391
1392 static uint32_t
eth_rx_queue_count(void * rx_queue)1393 eth_rx_queue_count(void *rx_queue)
1394 {
1395 struct vhost_queue *vq;
1396
1397 vq = rx_queue;
1398 if (vq == NULL)
1399 return 0;
1400
1401 return rte_vhost_rx_queue_count(vq->vid, vq->virtqueue_id);
1402 }
1403
1404 #define CLB_VAL_IDX 0
1405 #define CLB_MSK_IDX 1
1406 #define CLB_MATCH_IDX 2
1407 static int
vhost_monitor_callback(const uint64_t value,const uint64_t opaque[RTE_POWER_MONITOR_OPAQUE_SZ])1408 vhost_monitor_callback(const uint64_t value,
1409 const uint64_t opaque[RTE_POWER_MONITOR_OPAQUE_SZ])
1410 {
1411 const uint64_t m = opaque[CLB_MSK_IDX];
1412 const uint64_t v = opaque[CLB_VAL_IDX];
1413 const uint64_t c = opaque[CLB_MATCH_IDX];
1414
1415 if (c)
1416 return (value & m) == v ? -1 : 0;
1417 else
1418 return (value & m) == v ? 0 : -1;
1419 }
1420
1421 static int
vhost_get_monitor_addr(void * rx_queue,struct rte_power_monitor_cond * pmc)1422 vhost_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
1423 {
1424 struct vhost_queue *vq = rx_queue;
1425 struct rte_vhost_power_monitor_cond vhost_pmc;
1426 int ret;
1427 if (vq == NULL)
1428 return -EINVAL;
1429 ret = rte_vhost_get_monitor_addr(vq->vid, vq->virtqueue_id,
1430 &vhost_pmc);
1431 if (ret < 0)
1432 return -EINVAL;
1433 pmc->addr = vhost_pmc.addr;
1434 pmc->opaque[CLB_VAL_IDX] = vhost_pmc.val;
1435 pmc->opaque[CLB_MSK_IDX] = vhost_pmc.mask;
1436 pmc->opaque[CLB_MATCH_IDX] = vhost_pmc.match;
1437 pmc->size = vhost_pmc.size;
1438 pmc->fn = vhost_monitor_callback;
1439
1440 return 0;
1441 }
1442
1443 static const struct eth_dev_ops ops = {
1444 .dev_start = eth_dev_start,
1445 .dev_stop = eth_dev_stop,
1446 .dev_close = eth_dev_close,
1447 .dev_configure = eth_dev_configure,
1448 .dev_infos_get = eth_dev_info,
1449 .rx_queue_setup = eth_rx_queue_setup,
1450 .tx_queue_setup = eth_tx_queue_setup,
1451 .rx_queue_release = eth_rx_queue_release,
1452 .tx_queue_release = eth_tx_queue_release,
1453 .tx_done_cleanup = eth_tx_done_cleanup,
1454 .link_update = eth_link_update,
1455 .stats_get = eth_stats_get,
1456 .stats_reset = eth_stats_reset,
1457 .xstats_reset = vhost_dev_xstats_reset,
1458 .xstats_get = vhost_dev_xstats_get,
1459 .xstats_get_names = vhost_dev_xstats_get_names,
1460 .rx_queue_intr_enable = eth_rxq_intr_enable,
1461 .rx_queue_intr_disable = eth_rxq_intr_disable,
1462 .get_monitor_addr = vhost_get_monitor_addr,
1463 };
1464
1465 static int
eth_dev_vhost_create(struct rte_vdev_device * dev,char * iface_name,int16_t queues,const unsigned int numa_node,uint64_t flags,uint64_t disable_flags)1466 eth_dev_vhost_create(struct rte_vdev_device *dev, char *iface_name,
1467 int16_t queues, const unsigned int numa_node, uint64_t flags,
1468 uint64_t disable_flags)
1469 {
1470 const char *name = rte_vdev_device_name(dev);
1471 struct rte_eth_dev_data *data;
1472 struct pmd_internal *internal = NULL;
1473 struct rte_eth_dev *eth_dev = NULL;
1474 struct rte_ether_addr *eth_addr = NULL;
1475
1476 VHOST_LOG(INFO, "Creating VHOST-USER backend on numa socket %u\n",
1477 numa_node);
1478
1479 /* reserve an ethdev entry */
1480 eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internal));
1481 if (eth_dev == NULL)
1482 goto error;
1483 data = eth_dev->data;
1484
1485 eth_addr = rte_zmalloc_socket(name, sizeof(*eth_addr), 0, numa_node);
1486 if (eth_addr == NULL)
1487 goto error;
1488 data->mac_addrs = eth_addr;
1489 *eth_addr = base_eth_addr;
1490 eth_addr->addr_bytes[5] = eth_dev->data->port_id;
1491
1492 /* now put it all together
1493 * - store queue data in internal,
1494 * - point eth_dev_data to internals
1495 * - and point eth_dev structure to new eth_dev_data structure
1496 */
1497 internal = eth_dev->data->dev_private;
1498 internal->iface_name = rte_malloc_socket(name, strlen(iface_name) + 1,
1499 0, numa_node);
1500 if (internal->iface_name == NULL)
1501 goto error;
1502 strcpy(internal->iface_name, iface_name);
1503
1504 data->nb_rx_queues = queues;
1505 data->nb_tx_queues = queues;
1506 internal->max_queues = queues;
1507 internal->vid = -1;
1508 internal->flags = flags;
1509 internal->disable_flags = disable_flags;
1510 data->dev_link = pmd_link;
1511 data->dev_flags = RTE_ETH_DEV_INTR_LSC |
1512 RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
1513 data->promiscuous = 1;
1514 data->all_multicast = 1;
1515
1516 eth_dev->dev_ops = &ops;
1517 eth_dev->rx_queue_count = eth_rx_queue_count;
1518
1519 /* finally assign rx and tx ops */
1520 eth_dev->rx_pkt_burst = eth_vhost_rx;
1521 eth_dev->tx_pkt_burst = eth_vhost_tx;
1522
1523 rte_eth_dev_probing_finish(eth_dev);
1524 return 0;
1525
1526 error:
1527 if (internal)
1528 rte_free(internal->iface_name);
1529 rte_eth_dev_release_port(eth_dev);
1530
1531 return -1;
1532 }
1533
1534 static inline int
open_iface(const char * key __rte_unused,const char * value,void * extra_args)1535 open_iface(const char *key __rte_unused, const char *value, void *extra_args)
1536 {
1537 const char **iface_name = extra_args;
1538
1539 if (value == NULL)
1540 return -1;
1541
1542 *iface_name = value;
1543
1544 return 0;
1545 }
1546
1547 static inline int
open_int(const char * key __rte_unused,const char * value,void * extra_args)1548 open_int(const char *key __rte_unused, const char *value, void *extra_args)
1549 {
1550 uint16_t *n = extra_args;
1551
1552 if (value == NULL || extra_args == NULL)
1553 return -EINVAL;
1554
1555 *n = (uint16_t)strtoul(value, NULL, 0);
1556 if (*n == USHRT_MAX && errno == ERANGE)
1557 return -1;
1558
1559 return 0;
1560 }
1561
1562 static int
rte_pmd_vhost_probe(struct rte_vdev_device * dev)1563 rte_pmd_vhost_probe(struct rte_vdev_device *dev)
1564 {
1565 struct rte_kvargs *kvlist = NULL;
1566 int ret = 0;
1567 char *iface_name;
1568 uint16_t queues;
1569 uint64_t flags = 0;
1570 uint64_t disable_flags = 0;
1571 int client_mode = 0;
1572 int iommu_support = 0;
1573 int postcopy_support = 0;
1574 int tso = 0;
1575 int linear_buf = 0;
1576 int ext_buf = 0;
1577 struct rte_eth_dev *eth_dev;
1578 const char *name = rte_vdev_device_name(dev);
1579
1580 VHOST_LOG(INFO, "Initializing pmd_vhost for %s\n", name);
1581
1582 if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1583 eth_dev = rte_eth_dev_attach_secondary(name);
1584 if (!eth_dev) {
1585 VHOST_LOG(ERR, "Failed to probe %s\n", name);
1586 return -1;
1587 }
1588 eth_dev->rx_pkt_burst = eth_vhost_rx;
1589 eth_dev->tx_pkt_burst = eth_vhost_tx;
1590 eth_dev->dev_ops = &ops;
1591 if (dev->device.numa_node == SOCKET_ID_ANY)
1592 dev->device.numa_node = rte_socket_id();
1593 eth_dev->device = &dev->device;
1594 rte_eth_dev_probing_finish(eth_dev);
1595 return 0;
1596 }
1597
1598 kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_arguments);
1599 if (kvlist == NULL)
1600 return -1;
1601
1602 if (rte_kvargs_count(kvlist, ETH_VHOST_IFACE_ARG) == 1) {
1603 ret = rte_kvargs_process(kvlist, ETH_VHOST_IFACE_ARG,
1604 &open_iface, &iface_name);
1605 if (ret < 0)
1606 goto out_free;
1607 } else {
1608 ret = -1;
1609 goto out_free;
1610 }
1611
1612 if (rte_kvargs_count(kvlist, ETH_VHOST_QUEUES_ARG) == 1) {
1613 ret = rte_kvargs_process(kvlist, ETH_VHOST_QUEUES_ARG,
1614 &open_int, &queues);
1615 if (ret < 0 || queues > RTE_MAX_QUEUES_PER_PORT)
1616 goto out_free;
1617
1618 } else
1619 queues = 1;
1620
1621 if (rte_kvargs_count(kvlist, ETH_VHOST_CLIENT_ARG) == 1) {
1622 ret = rte_kvargs_process(kvlist, ETH_VHOST_CLIENT_ARG,
1623 &open_int, &client_mode);
1624 if (ret < 0)
1625 goto out_free;
1626
1627 if (client_mode)
1628 flags |= RTE_VHOST_USER_CLIENT;
1629 }
1630
1631 if (rte_kvargs_count(kvlist, ETH_VHOST_IOMMU_SUPPORT) == 1) {
1632 ret = rte_kvargs_process(kvlist, ETH_VHOST_IOMMU_SUPPORT,
1633 &open_int, &iommu_support);
1634 if (ret < 0)
1635 goto out_free;
1636
1637 if (iommu_support)
1638 flags |= RTE_VHOST_USER_IOMMU_SUPPORT;
1639 }
1640
1641 if (rte_kvargs_count(kvlist, ETH_VHOST_POSTCOPY_SUPPORT) == 1) {
1642 ret = rte_kvargs_process(kvlist, ETH_VHOST_POSTCOPY_SUPPORT,
1643 &open_int, &postcopy_support);
1644 if (ret < 0)
1645 goto out_free;
1646
1647 if (postcopy_support)
1648 flags |= RTE_VHOST_USER_POSTCOPY_SUPPORT;
1649 }
1650
1651 if (rte_kvargs_count(kvlist, ETH_VHOST_VIRTIO_NET_F_HOST_TSO) == 1) {
1652 ret = rte_kvargs_process(kvlist,
1653 ETH_VHOST_VIRTIO_NET_F_HOST_TSO,
1654 &open_int, &tso);
1655 if (ret < 0)
1656 goto out_free;
1657 }
1658
1659 if (tso == 0) {
1660 disable_flags |= (1ULL << VIRTIO_NET_F_HOST_TSO4);
1661 disable_flags |= (1ULL << VIRTIO_NET_F_HOST_TSO6);
1662 }
1663
1664 if (rte_kvargs_count(kvlist, ETH_VHOST_LINEAR_BUF) == 1) {
1665 ret = rte_kvargs_process(kvlist,
1666 ETH_VHOST_LINEAR_BUF,
1667 &open_int, &linear_buf);
1668 if (ret < 0)
1669 goto out_free;
1670
1671 if (linear_buf == 1)
1672 flags |= RTE_VHOST_USER_LINEARBUF_SUPPORT;
1673 }
1674
1675 if (rte_kvargs_count(kvlist, ETH_VHOST_EXT_BUF) == 1) {
1676 ret = rte_kvargs_process(kvlist,
1677 ETH_VHOST_EXT_BUF,
1678 &open_int, &ext_buf);
1679 if (ret < 0)
1680 goto out_free;
1681
1682 if (ext_buf == 1)
1683 flags |= RTE_VHOST_USER_EXTBUF_SUPPORT;
1684 }
1685
1686 if (dev->device.numa_node == SOCKET_ID_ANY)
1687 dev->device.numa_node = rte_socket_id();
1688
1689 ret = eth_dev_vhost_create(dev, iface_name, queues,
1690 dev->device.numa_node, flags, disable_flags);
1691 if (ret == -1)
1692 VHOST_LOG(ERR, "Failed to create %s\n", name);
1693
1694 out_free:
1695 rte_kvargs_free(kvlist);
1696 return ret;
1697 }
1698
1699 static int
rte_pmd_vhost_remove(struct rte_vdev_device * dev)1700 rte_pmd_vhost_remove(struct rte_vdev_device *dev)
1701 {
1702 const char *name;
1703 struct rte_eth_dev *eth_dev = NULL;
1704
1705 name = rte_vdev_device_name(dev);
1706 VHOST_LOG(INFO, "Un-Initializing pmd_vhost for %s\n", name);
1707
1708 /* find an ethdev entry */
1709 eth_dev = rte_eth_dev_allocated(name);
1710 if (eth_dev == NULL)
1711 return 0;
1712
1713 eth_dev_close(eth_dev);
1714 rte_eth_dev_release_port(eth_dev);
1715
1716 return 0;
1717 }
1718
1719 static struct rte_vdev_driver pmd_vhost_drv = {
1720 .probe = rte_pmd_vhost_probe,
1721 .remove = rte_pmd_vhost_remove,
1722 };
1723
1724 RTE_PMD_REGISTER_VDEV(net_vhost, pmd_vhost_drv);
1725 RTE_PMD_REGISTER_ALIAS(net_vhost, eth_vhost);
1726 RTE_PMD_REGISTER_PARAM_STRING(net_vhost,
1727 "iface=<ifc> "
1728 "queues=<int> "
1729 "client=<0|1> "
1730 "iommu-support=<0|1> "
1731 "postcopy-support=<0|1> "
1732 "tso=<0|1> "
1733 "linear-buffer=<0|1> "
1734 "ext-buffer=<0|1>");
1735