xref: /f-stack/dpdk/examples/vhost/virtio_net.c (revision d30ea906)
1*d30ea906Sjfb8856606 /* SPDX-License-Identifier: BSD-3-Clause
2*d30ea906Sjfb8856606  * Copyright(c) 2010-2017 Intel Corporation
32bfe3f2eSlogwang  */
42bfe3f2eSlogwang 
52bfe3f2eSlogwang #include <stdint.h>
62bfe3f2eSlogwang #include <stdbool.h>
72bfe3f2eSlogwang #include <linux/virtio_net.h>
82bfe3f2eSlogwang 
92bfe3f2eSlogwang #include <rte_mbuf.h>
102bfe3f2eSlogwang #include <rte_memcpy.h>
112bfe3f2eSlogwang #include <rte_vhost.h>
122bfe3f2eSlogwang 
132bfe3f2eSlogwang #include "main.h"
142bfe3f2eSlogwang 
152bfe3f2eSlogwang /*
162bfe3f2eSlogwang  * A very simple vhost-user net driver implementation, without
172bfe3f2eSlogwang  * any extra features being enabled, such as TSO and mrg-Rx.
182bfe3f2eSlogwang  */
192bfe3f2eSlogwang 
202bfe3f2eSlogwang void
vs_vhost_net_setup(struct vhost_dev * dev)212bfe3f2eSlogwang vs_vhost_net_setup(struct vhost_dev *dev)
222bfe3f2eSlogwang {
232bfe3f2eSlogwang 	uint16_t i;
242bfe3f2eSlogwang 	int vid = dev->vid;
252bfe3f2eSlogwang 	struct vhost_queue *queue;
262bfe3f2eSlogwang 
272bfe3f2eSlogwang 	RTE_LOG(INFO, VHOST_CONFIG,
282bfe3f2eSlogwang 		"setting builtin vhost-user net driver\n");
292bfe3f2eSlogwang 
302bfe3f2eSlogwang 	rte_vhost_get_negotiated_features(vid, &dev->features);
312bfe3f2eSlogwang 	if (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF))
322bfe3f2eSlogwang 		dev->hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
332bfe3f2eSlogwang 	else
342bfe3f2eSlogwang 		dev->hdr_len = sizeof(struct virtio_net_hdr);
352bfe3f2eSlogwang 
362bfe3f2eSlogwang 	rte_vhost_get_mem_table(vid, &dev->mem);
372bfe3f2eSlogwang 
382bfe3f2eSlogwang 	dev->nr_vrings = rte_vhost_get_vring_num(vid);
392bfe3f2eSlogwang 	for (i = 0; i < dev->nr_vrings; i++) {
402bfe3f2eSlogwang 		queue = &dev->queues[i];
412bfe3f2eSlogwang 
422bfe3f2eSlogwang 		queue->last_used_idx  = 0;
432bfe3f2eSlogwang 		queue->last_avail_idx = 0;
442bfe3f2eSlogwang 		rte_vhost_get_vhost_vring(vid, i, &queue->vr);
452bfe3f2eSlogwang 	}
462bfe3f2eSlogwang }
472bfe3f2eSlogwang 
482bfe3f2eSlogwang void
vs_vhost_net_remove(struct vhost_dev * dev)492bfe3f2eSlogwang vs_vhost_net_remove(struct vhost_dev *dev)
502bfe3f2eSlogwang {
512bfe3f2eSlogwang 	free(dev->mem);
522bfe3f2eSlogwang }
532bfe3f2eSlogwang 
542bfe3f2eSlogwang static __rte_always_inline int
enqueue_pkt(struct vhost_dev * dev,struct rte_vhost_vring * vr,struct rte_mbuf * m,uint16_t desc_idx)552bfe3f2eSlogwang enqueue_pkt(struct vhost_dev *dev, struct rte_vhost_vring *vr,
562bfe3f2eSlogwang 	    struct rte_mbuf *m, uint16_t desc_idx)
572bfe3f2eSlogwang {
582bfe3f2eSlogwang 	uint32_t desc_avail, desc_offset;
592bfe3f2eSlogwang 	uint64_t desc_chunck_len;
602bfe3f2eSlogwang 	uint32_t mbuf_avail, mbuf_offset;
612bfe3f2eSlogwang 	uint32_t cpy_len;
622bfe3f2eSlogwang 	struct vring_desc *desc;
632bfe3f2eSlogwang 	uint64_t desc_addr, desc_gaddr;
642bfe3f2eSlogwang 	struct virtio_net_hdr virtio_hdr = {0, 0, 0, 0, 0, 0};
652bfe3f2eSlogwang 	/* A counter to avoid desc dead loop chain */
662bfe3f2eSlogwang 	uint16_t nr_desc = 1;
672bfe3f2eSlogwang 
682bfe3f2eSlogwang 	desc = &vr->desc[desc_idx];
692bfe3f2eSlogwang 	desc_chunck_len = desc->len;
702bfe3f2eSlogwang 	desc_gaddr = desc->addr;
712bfe3f2eSlogwang 	desc_addr = rte_vhost_va_from_guest_pa(
722bfe3f2eSlogwang 			dev->mem, desc_gaddr, &desc_chunck_len);
732bfe3f2eSlogwang 	/*
742bfe3f2eSlogwang 	 * Checking of 'desc_addr' placed outside of 'unlikely' macro to avoid
752bfe3f2eSlogwang 	 * performance issue with some versions of gcc (4.8.4 and 5.3.0) which
762bfe3f2eSlogwang 	 * otherwise stores offset on the stack instead of in a register.
772bfe3f2eSlogwang 	 */
782bfe3f2eSlogwang 	if (unlikely(desc->len < dev->hdr_len) || !desc_addr)
792bfe3f2eSlogwang 		return -1;
802bfe3f2eSlogwang 
812bfe3f2eSlogwang 	rte_prefetch0((void *)(uintptr_t)desc_addr);
822bfe3f2eSlogwang 
832bfe3f2eSlogwang 	/* write virtio-net header */
842bfe3f2eSlogwang 	if (likely(desc_chunck_len >= dev->hdr_len)) {
852bfe3f2eSlogwang 		*(struct virtio_net_hdr *)(uintptr_t)desc_addr = virtio_hdr;
862bfe3f2eSlogwang 		desc_offset = dev->hdr_len;
872bfe3f2eSlogwang 	} else {
882bfe3f2eSlogwang 		uint64_t len;
892bfe3f2eSlogwang 		uint64_t remain = dev->hdr_len;
902bfe3f2eSlogwang 		uint64_t src = (uint64_t)(uintptr_t)&virtio_hdr, dst;
912bfe3f2eSlogwang 		uint64_t guest_addr = desc_gaddr;
922bfe3f2eSlogwang 
932bfe3f2eSlogwang 		while (remain) {
942bfe3f2eSlogwang 			len = remain;
952bfe3f2eSlogwang 			dst = rte_vhost_va_from_guest_pa(dev->mem,
962bfe3f2eSlogwang 					guest_addr, &len);
972bfe3f2eSlogwang 			if (unlikely(!dst || !len))
982bfe3f2eSlogwang 				return -1;
992bfe3f2eSlogwang 
1002bfe3f2eSlogwang 			rte_memcpy((void *)(uintptr_t)dst,
1012bfe3f2eSlogwang 					(void *)(uintptr_t)src,
1022bfe3f2eSlogwang 					len);
1032bfe3f2eSlogwang 
1042bfe3f2eSlogwang 			remain -= len;
1052bfe3f2eSlogwang 			guest_addr += len;
106*d30ea906Sjfb8856606 			src += len;
1072bfe3f2eSlogwang 		}
1082bfe3f2eSlogwang 
1092bfe3f2eSlogwang 		desc_chunck_len = desc->len - dev->hdr_len;
1102bfe3f2eSlogwang 		desc_gaddr += dev->hdr_len;
1112bfe3f2eSlogwang 		desc_addr = rte_vhost_va_from_guest_pa(
1122bfe3f2eSlogwang 				dev->mem, desc_gaddr,
1132bfe3f2eSlogwang 				&desc_chunck_len);
1142bfe3f2eSlogwang 		if (unlikely(!desc_addr))
1152bfe3f2eSlogwang 			return -1;
1162bfe3f2eSlogwang 
1172bfe3f2eSlogwang 		desc_offset = 0;
1182bfe3f2eSlogwang 	}
1192bfe3f2eSlogwang 
1202bfe3f2eSlogwang 	desc_avail  = desc->len - dev->hdr_len;
1212bfe3f2eSlogwang 
1222bfe3f2eSlogwang 	mbuf_avail  = rte_pktmbuf_data_len(m);
1232bfe3f2eSlogwang 	mbuf_offset = 0;
1242bfe3f2eSlogwang 	while (mbuf_avail != 0 || m->next != NULL) {
1252bfe3f2eSlogwang 		/* done with current mbuf, fetch next */
1262bfe3f2eSlogwang 		if (mbuf_avail == 0) {
1272bfe3f2eSlogwang 			m = m->next;
1282bfe3f2eSlogwang 
1292bfe3f2eSlogwang 			mbuf_offset = 0;
1302bfe3f2eSlogwang 			mbuf_avail  = rte_pktmbuf_data_len(m);
1312bfe3f2eSlogwang 		}
1322bfe3f2eSlogwang 
1332bfe3f2eSlogwang 		/* done with current desc buf, fetch next */
1342bfe3f2eSlogwang 		if (desc_avail == 0) {
1352bfe3f2eSlogwang 			if ((desc->flags & VRING_DESC_F_NEXT) == 0) {
1362bfe3f2eSlogwang 				/* Room in vring buffer is not enough */
1372bfe3f2eSlogwang 				return -1;
1382bfe3f2eSlogwang 			}
1392bfe3f2eSlogwang 			if (unlikely(desc->next >= vr->size ||
1402bfe3f2eSlogwang 				     ++nr_desc > vr->size))
1412bfe3f2eSlogwang 				return -1;
1422bfe3f2eSlogwang 
1432bfe3f2eSlogwang 			desc = &vr->desc[desc->next];
1442bfe3f2eSlogwang 			desc_chunck_len = desc->len;
1452bfe3f2eSlogwang 			desc_gaddr = desc->addr;
1462bfe3f2eSlogwang 			desc_addr = rte_vhost_va_from_guest_pa(
1472bfe3f2eSlogwang 					dev->mem, desc_gaddr, &desc_chunck_len);
1482bfe3f2eSlogwang 			if (unlikely(!desc_addr))
1492bfe3f2eSlogwang 				return -1;
1502bfe3f2eSlogwang 
1512bfe3f2eSlogwang 			desc_offset = 0;
1522bfe3f2eSlogwang 			desc_avail  = desc->len;
1532bfe3f2eSlogwang 		} else if (unlikely(desc_chunck_len == 0)) {
1542bfe3f2eSlogwang 			desc_chunck_len = desc_avail;
1552bfe3f2eSlogwang 			desc_gaddr += desc_offset;
1562bfe3f2eSlogwang 			desc_addr = rte_vhost_va_from_guest_pa(dev->mem,
1572bfe3f2eSlogwang 					desc_gaddr,
1582bfe3f2eSlogwang 					&desc_chunck_len);
1592bfe3f2eSlogwang 			if (unlikely(!desc_addr))
1602bfe3f2eSlogwang 				return -1;
1612bfe3f2eSlogwang 
1622bfe3f2eSlogwang 			desc_offset = 0;
1632bfe3f2eSlogwang 		}
1642bfe3f2eSlogwang 
1652bfe3f2eSlogwang 		cpy_len = RTE_MIN(desc_chunck_len, mbuf_avail);
1662bfe3f2eSlogwang 		rte_memcpy((void *)((uintptr_t)(desc_addr + desc_offset)),
1672bfe3f2eSlogwang 			rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
1682bfe3f2eSlogwang 			cpy_len);
1692bfe3f2eSlogwang 
1702bfe3f2eSlogwang 		mbuf_avail  -= cpy_len;
1712bfe3f2eSlogwang 		mbuf_offset += cpy_len;
1722bfe3f2eSlogwang 		desc_avail  -= cpy_len;
1732bfe3f2eSlogwang 		desc_offset += cpy_len;
1742bfe3f2eSlogwang 		desc_chunck_len -= cpy_len;
1752bfe3f2eSlogwang 	}
1762bfe3f2eSlogwang 
1772bfe3f2eSlogwang 	return 0;
1782bfe3f2eSlogwang }
1792bfe3f2eSlogwang 
1802bfe3f2eSlogwang uint16_t
vs_enqueue_pkts(struct vhost_dev * dev,uint16_t queue_id,struct rte_mbuf ** pkts,uint32_t count)1812bfe3f2eSlogwang vs_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id,
1822bfe3f2eSlogwang 		struct rte_mbuf **pkts, uint32_t count)
1832bfe3f2eSlogwang {
1842bfe3f2eSlogwang 	struct vhost_queue *queue;
1852bfe3f2eSlogwang 	struct rte_vhost_vring *vr;
1862bfe3f2eSlogwang 	uint16_t avail_idx, free_entries, start_idx;
1872bfe3f2eSlogwang 	uint16_t desc_indexes[MAX_PKT_BURST];
1882bfe3f2eSlogwang 	uint16_t used_idx;
1892bfe3f2eSlogwang 	uint32_t i;
1902bfe3f2eSlogwang 
1912bfe3f2eSlogwang 	queue = &dev->queues[queue_id];
1922bfe3f2eSlogwang 	vr    = &queue->vr;
1932bfe3f2eSlogwang 
1942bfe3f2eSlogwang 	avail_idx = *((volatile uint16_t *)&vr->avail->idx);
1952bfe3f2eSlogwang 	start_idx = queue->last_used_idx;
1962bfe3f2eSlogwang 	free_entries = avail_idx - start_idx;
1972bfe3f2eSlogwang 	count = RTE_MIN(count, free_entries);
1982bfe3f2eSlogwang 	count = RTE_MIN(count, (uint32_t)MAX_PKT_BURST);
1992bfe3f2eSlogwang 	if (count == 0)
2002bfe3f2eSlogwang 		return 0;
2012bfe3f2eSlogwang 
2022bfe3f2eSlogwang 	/* Retrieve all of the desc indexes first to avoid caching issues. */
2032bfe3f2eSlogwang 	rte_prefetch0(&vr->avail->ring[start_idx & (vr->size - 1)]);
2042bfe3f2eSlogwang 	for (i = 0; i < count; i++) {
2052bfe3f2eSlogwang 		used_idx = (start_idx + i) & (vr->size - 1);
2062bfe3f2eSlogwang 		desc_indexes[i] = vr->avail->ring[used_idx];
2072bfe3f2eSlogwang 		vr->used->ring[used_idx].id = desc_indexes[i];
2082bfe3f2eSlogwang 		vr->used->ring[used_idx].len = pkts[i]->pkt_len +
2092bfe3f2eSlogwang 					       dev->hdr_len;
2102bfe3f2eSlogwang 	}
2112bfe3f2eSlogwang 
2122bfe3f2eSlogwang 	rte_prefetch0(&vr->desc[desc_indexes[0]]);
2132bfe3f2eSlogwang 	for (i = 0; i < count; i++) {
2142bfe3f2eSlogwang 		uint16_t desc_idx = desc_indexes[i];
2152bfe3f2eSlogwang 		int err;
2162bfe3f2eSlogwang 
2172bfe3f2eSlogwang 		err = enqueue_pkt(dev, vr, pkts[i], desc_idx);
2182bfe3f2eSlogwang 		if (unlikely(err)) {
2192bfe3f2eSlogwang 			used_idx = (start_idx + i) & (vr->size - 1);
2202bfe3f2eSlogwang 			vr->used->ring[used_idx].len = dev->hdr_len;
2212bfe3f2eSlogwang 		}
2222bfe3f2eSlogwang 
2232bfe3f2eSlogwang 		if (i + 1 < count)
2242bfe3f2eSlogwang 			rte_prefetch0(&vr->desc[desc_indexes[i+1]]);
2252bfe3f2eSlogwang 	}
2262bfe3f2eSlogwang 
2272bfe3f2eSlogwang 	rte_smp_wmb();
2282bfe3f2eSlogwang 
2292bfe3f2eSlogwang 	*(volatile uint16_t *)&vr->used->idx += count;
2302bfe3f2eSlogwang 	queue->last_used_idx += count;
2312bfe3f2eSlogwang 
232*d30ea906Sjfb8856606 	rte_vhost_vring_call(dev->vid, queue_id);
2332bfe3f2eSlogwang 
2342bfe3f2eSlogwang 	return count;
2352bfe3f2eSlogwang }
2362bfe3f2eSlogwang 
2372bfe3f2eSlogwang static __rte_always_inline int
dequeue_pkt(struct vhost_dev * dev,struct rte_vhost_vring * vr,struct rte_mbuf * m,uint16_t desc_idx,struct rte_mempool * mbuf_pool)2382bfe3f2eSlogwang dequeue_pkt(struct vhost_dev *dev, struct rte_vhost_vring *vr,
2392bfe3f2eSlogwang 	    struct rte_mbuf *m, uint16_t desc_idx,
2402bfe3f2eSlogwang 	    struct rte_mempool *mbuf_pool)
2412bfe3f2eSlogwang {
2422bfe3f2eSlogwang 	struct vring_desc *desc;
2432bfe3f2eSlogwang 	uint64_t desc_addr, desc_gaddr;
2442bfe3f2eSlogwang 	uint32_t desc_avail, desc_offset;
2452bfe3f2eSlogwang 	uint64_t desc_chunck_len;
2462bfe3f2eSlogwang 	uint32_t mbuf_avail, mbuf_offset;
2472bfe3f2eSlogwang 	uint32_t cpy_len;
2482bfe3f2eSlogwang 	struct rte_mbuf *cur = m, *prev = m;
2492bfe3f2eSlogwang 	/* A counter to avoid desc dead loop chain */
2502bfe3f2eSlogwang 	uint32_t nr_desc = 1;
2512bfe3f2eSlogwang 
2522bfe3f2eSlogwang 	desc = &vr->desc[desc_idx];
2532bfe3f2eSlogwang 	if (unlikely((desc->len < dev->hdr_len)) ||
2542bfe3f2eSlogwang 			(desc->flags & VRING_DESC_F_INDIRECT))
2552bfe3f2eSlogwang 		return -1;
2562bfe3f2eSlogwang 
2572bfe3f2eSlogwang 	desc_chunck_len = desc->len;
2582bfe3f2eSlogwang 	desc_gaddr = desc->addr;
2592bfe3f2eSlogwang 	desc_addr = rte_vhost_va_from_guest_pa(
2602bfe3f2eSlogwang 			dev->mem, desc_gaddr, &desc_chunck_len);
2612bfe3f2eSlogwang 	if (unlikely(!desc_addr))
2622bfe3f2eSlogwang 		return -1;
2632bfe3f2eSlogwang 
2642bfe3f2eSlogwang 	/*
2652bfe3f2eSlogwang 	 * We don't support ANY_LAYOUT, neither VERSION_1, meaning
2662bfe3f2eSlogwang 	 * a Tx packet from guest must have 2 desc buffers at least:
2672bfe3f2eSlogwang 	 * the first for storing the header and the others for
2682bfe3f2eSlogwang 	 * storing the data.
2692bfe3f2eSlogwang 	 *
2702bfe3f2eSlogwang 	 * And since we don't support TSO, we could simply skip the
2712bfe3f2eSlogwang 	 * header.
2722bfe3f2eSlogwang 	 */
2732bfe3f2eSlogwang 	desc = &vr->desc[desc->next];
2742bfe3f2eSlogwang 	desc_chunck_len = desc->len;
2752bfe3f2eSlogwang 	desc_gaddr = desc->addr;
2762bfe3f2eSlogwang 	desc_addr = rte_vhost_va_from_guest_pa(
2772bfe3f2eSlogwang 			dev->mem, desc_gaddr, &desc_chunck_len);
2782bfe3f2eSlogwang 	if (unlikely(!desc_addr))
2792bfe3f2eSlogwang 		return -1;
2802bfe3f2eSlogwang 	rte_prefetch0((void *)(uintptr_t)desc_addr);
2812bfe3f2eSlogwang 
2822bfe3f2eSlogwang 	desc_offset = 0;
2832bfe3f2eSlogwang 	desc_avail  = desc->len;
2842bfe3f2eSlogwang 	nr_desc    += 1;
2852bfe3f2eSlogwang 
2862bfe3f2eSlogwang 	mbuf_offset = 0;
2872bfe3f2eSlogwang 	mbuf_avail  = m->buf_len - RTE_PKTMBUF_HEADROOM;
2882bfe3f2eSlogwang 	while (1) {
2892bfe3f2eSlogwang 		cpy_len = RTE_MIN(desc_chunck_len, mbuf_avail);
2902bfe3f2eSlogwang 		rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *,
2912bfe3f2eSlogwang 						   mbuf_offset),
2922bfe3f2eSlogwang 			(void *)((uintptr_t)(desc_addr + desc_offset)),
2932bfe3f2eSlogwang 			cpy_len);
2942bfe3f2eSlogwang 
2952bfe3f2eSlogwang 		mbuf_avail  -= cpy_len;
2962bfe3f2eSlogwang 		mbuf_offset += cpy_len;
2972bfe3f2eSlogwang 		desc_avail  -= cpy_len;
2982bfe3f2eSlogwang 		desc_offset += cpy_len;
2992bfe3f2eSlogwang 		desc_chunck_len -= cpy_len;
3002bfe3f2eSlogwang 
3012bfe3f2eSlogwang 		/* This desc reaches to its end, get the next one */
3022bfe3f2eSlogwang 		if (desc_avail == 0) {
3032bfe3f2eSlogwang 			if ((desc->flags & VRING_DESC_F_NEXT) == 0)
3042bfe3f2eSlogwang 				break;
3052bfe3f2eSlogwang 
3062bfe3f2eSlogwang 			if (unlikely(desc->next >= vr->size ||
3072bfe3f2eSlogwang 				     ++nr_desc > vr->size))
3082bfe3f2eSlogwang 				return -1;
3092bfe3f2eSlogwang 			desc = &vr->desc[desc->next];
3102bfe3f2eSlogwang 
3112bfe3f2eSlogwang 			desc_chunck_len = desc->len;
3122bfe3f2eSlogwang 			desc_gaddr = desc->addr;
3132bfe3f2eSlogwang 			desc_addr = rte_vhost_va_from_guest_pa(
3142bfe3f2eSlogwang 					dev->mem, desc_gaddr, &desc_chunck_len);
3152bfe3f2eSlogwang 			if (unlikely(!desc_addr))
3162bfe3f2eSlogwang 				return -1;
3172bfe3f2eSlogwang 			rte_prefetch0((void *)(uintptr_t)desc_addr);
3182bfe3f2eSlogwang 
3192bfe3f2eSlogwang 			desc_offset = 0;
3202bfe3f2eSlogwang 			desc_avail  = desc->len;
3212bfe3f2eSlogwang 		} else if (unlikely(desc_chunck_len == 0)) {
3222bfe3f2eSlogwang 			desc_chunck_len = desc_avail;
3232bfe3f2eSlogwang 			desc_gaddr += desc_offset;
3242bfe3f2eSlogwang 			desc_addr = rte_vhost_va_from_guest_pa(dev->mem,
3252bfe3f2eSlogwang 					desc_gaddr,
3262bfe3f2eSlogwang 					&desc_chunck_len);
3272bfe3f2eSlogwang 			if (unlikely(!desc_addr))
3282bfe3f2eSlogwang 				return -1;
3292bfe3f2eSlogwang 
3302bfe3f2eSlogwang 			desc_offset = 0;
3312bfe3f2eSlogwang 		}
3322bfe3f2eSlogwang 
3332bfe3f2eSlogwang 		/*
3342bfe3f2eSlogwang 		 * This mbuf reaches to its end, get a new one
3352bfe3f2eSlogwang 		 * to hold more data.
3362bfe3f2eSlogwang 		 */
3372bfe3f2eSlogwang 		if (mbuf_avail == 0) {
3382bfe3f2eSlogwang 			cur = rte_pktmbuf_alloc(mbuf_pool);
3392bfe3f2eSlogwang 			if (unlikely(cur == NULL)) {
3402bfe3f2eSlogwang 				RTE_LOG(ERR, VHOST_DATA, "Failed to "
3412bfe3f2eSlogwang 					"allocate memory for mbuf.\n");
3422bfe3f2eSlogwang 				return -1;
3432bfe3f2eSlogwang 			}
3442bfe3f2eSlogwang 
3452bfe3f2eSlogwang 			prev->next = cur;
3462bfe3f2eSlogwang 			prev->data_len = mbuf_offset;
3472bfe3f2eSlogwang 			m->nb_segs += 1;
3482bfe3f2eSlogwang 			m->pkt_len += mbuf_offset;
3492bfe3f2eSlogwang 			prev = cur;
3502bfe3f2eSlogwang 
3512bfe3f2eSlogwang 			mbuf_offset = 0;
3522bfe3f2eSlogwang 			mbuf_avail  = cur->buf_len - RTE_PKTMBUF_HEADROOM;
3532bfe3f2eSlogwang 		}
3542bfe3f2eSlogwang 	}
3552bfe3f2eSlogwang 
3562bfe3f2eSlogwang 	prev->data_len = mbuf_offset;
3572bfe3f2eSlogwang 	m->pkt_len    += mbuf_offset;
3582bfe3f2eSlogwang 
3592bfe3f2eSlogwang 	return 0;
3602bfe3f2eSlogwang }
3612bfe3f2eSlogwang 
3622bfe3f2eSlogwang uint16_t
vs_dequeue_pkts(struct vhost_dev * dev,uint16_t queue_id,struct rte_mempool * mbuf_pool,struct rte_mbuf ** pkts,uint16_t count)3632bfe3f2eSlogwang vs_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
3642bfe3f2eSlogwang 	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
3652bfe3f2eSlogwang {
3662bfe3f2eSlogwang 	struct vhost_queue *queue;
3672bfe3f2eSlogwang 	struct rte_vhost_vring *vr;
3682bfe3f2eSlogwang 	uint32_t desc_indexes[MAX_PKT_BURST];
3692bfe3f2eSlogwang 	uint32_t used_idx;
3702bfe3f2eSlogwang 	uint32_t i = 0;
3712bfe3f2eSlogwang 	uint16_t free_entries;
3722bfe3f2eSlogwang 	uint16_t avail_idx;
3732bfe3f2eSlogwang 
3742bfe3f2eSlogwang 	queue = &dev->queues[queue_id];
3752bfe3f2eSlogwang 	vr    = &queue->vr;
3762bfe3f2eSlogwang 
3772bfe3f2eSlogwang 	free_entries = *((volatile uint16_t *)&vr->avail->idx) -
3782bfe3f2eSlogwang 			queue->last_avail_idx;
3792bfe3f2eSlogwang 	if (free_entries == 0)
3802bfe3f2eSlogwang 		return 0;
3812bfe3f2eSlogwang 
3822bfe3f2eSlogwang 	/* Prefetch available and used ring */
3832bfe3f2eSlogwang 	avail_idx = queue->last_avail_idx & (vr->size - 1);
3842bfe3f2eSlogwang 	used_idx  = queue->last_used_idx  & (vr->size - 1);
3852bfe3f2eSlogwang 	rte_prefetch0(&vr->avail->ring[avail_idx]);
3862bfe3f2eSlogwang 	rte_prefetch0(&vr->used->ring[used_idx]);
3872bfe3f2eSlogwang 
3882bfe3f2eSlogwang 	count = RTE_MIN(count, MAX_PKT_BURST);
3892bfe3f2eSlogwang 	count = RTE_MIN(count, free_entries);
3902bfe3f2eSlogwang 
3912bfe3f2eSlogwang 	if (unlikely(count == 0))
3922bfe3f2eSlogwang 		return 0;
3932bfe3f2eSlogwang 
3942bfe3f2eSlogwang 	/*
3952bfe3f2eSlogwang 	 * Retrieve all of the head indexes first and pre-update used entries
3962bfe3f2eSlogwang 	 * to avoid caching issues.
3972bfe3f2eSlogwang 	 */
3982bfe3f2eSlogwang 	for (i = 0; i < count; i++) {
3992bfe3f2eSlogwang 		avail_idx = (queue->last_avail_idx + i) & (vr->size - 1);
4002bfe3f2eSlogwang 		used_idx  = (queue->last_used_idx  + i) & (vr->size - 1);
4012bfe3f2eSlogwang 		desc_indexes[i] = vr->avail->ring[avail_idx];
4022bfe3f2eSlogwang 
4032bfe3f2eSlogwang 		vr->used->ring[used_idx].id  = desc_indexes[i];
4042bfe3f2eSlogwang 		vr->used->ring[used_idx].len = 0;
4052bfe3f2eSlogwang 	}
4062bfe3f2eSlogwang 
4072bfe3f2eSlogwang 	/* Prefetch descriptor index. */
4082bfe3f2eSlogwang 	rte_prefetch0(&vr->desc[desc_indexes[0]]);
4092bfe3f2eSlogwang 	for (i = 0; i < count; i++) {
4102bfe3f2eSlogwang 		int err;
4112bfe3f2eSlogwang 
4122bfe3f2eSlogwang 		if (likely(i + 1 < count))
4132bfe3f2eSlogwang 			rte_prefetch0(&vr->desc[desc_indexes[i + 1]]);
4142bfe3f2eSlogwang 
4152bfe3f2eSlogwang 		pkts[i] = rte_pktmbuf_alloc(mbuf_pool);
4162bfe3f2eSlogwang 		if (unlikely(pkts[i] == NULL)) {
4172bfe3f2eSlogwang 			RTE_LOG(ERR, VHOST_DATA,
4182bfe3f2eSlogwang 				"Failed to allocate memory for mbuf.\n");
4192bfe3f2eSlogwang 			break;
4202bfe3f2eSlogwang 		}
4212bfe3f2eSlogwang 
4222bfe3f2eSlogwang 		err = dequeue_pkt(dev, vr, pkts[i], desc_indexes[i], mbuf_pool);
4232bfe3f2eSlogwang 		if (unlikely(err)) {
4242bfe3f2eSlogwang 			rte_pktmbuf_free(pkts[i]);
4252bfe3f2eSlogwang 			break;
4262bfe3f2eSlogwang 		}
4272bfe3f2eSlogwang 
4282bfe3f2eSlogwang 	}
4292bfe3f2eSlogwang 
4302bfe3f2eSlogwang 	queue->last_avail_idx += i;
4312bfe3f2eSlogwang 	queue->last_used_idx += i;
4322bfe3f2eSlogwang 	rte_smp_wmb();
4332bfe3f2eSlogwang 	rte_smp_rmb();
4342bfe3f2eSlogwang 
4352bfe3f2eSlogwang 	vr->used->idx += i;
4362bfe3f2eSlogwang 
437*d30ea906Sjfb8856606 	rte_vhost_vring_call(dev->vid, queue_id);
4382bfe3f2eSlogwang 
4392bfe3f2eSlogwang 	return i;
4402bfe3f2eSlogwang }
441