xref: /dpdk/drivers/net/virtio/virtio_ethdev.c (revision 829c5946)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation
3  */
4 
5 #include <stdint.h>
6 #include <string.h>
7 #include <stdio.h>
8 #include <errno.h>
9 #include <unistd.h>
10 
11 #include <ethdev_driver.h>
12 #include <rte_memcpy.h>
13 #include <rte_string_fns.h>
14 #include <rte_memzone.h>
15 #include <rte_malloc.h>
16 #include <rte_branch_prediction.h>
17 #include <rte_ether.h>
18 #include <rte_ip.h>
19 #include <rte_arp.h>
20 #include <rte_common.h>
21 #include <rte_errno.h>
22 #include <rte_cpuflags.h>
23 #include <rte_vect.h>
24 #include <rte_memory.h>
25 #include <rte_eal_paging.h>
26 #include <rte_eal.h>
27 #include <rte_dev.h>
28 #include <rte_cycles.h>
29 #include <rte_kvargs.h>
30 
31 #include "virtio_ethdev.h"
32 #include "virtio.h"
33 #include "virtio_logs.h"
34 #include "virtqueue.h"
35 #include "virtio_rxtx.h"
36 #include "virtio_rxtx_simple.h"
37 #include "virtio_user/virtio_user_dev.h"
38 
39 static int  virtio_dev_configure(struct rte_eth_dev *dev);
40 static int  virtio_dev_start(struct rte_eth_dev *dev);
41 static int virtio_dev_promiscuous_enable(struct rte_eth_dev *dev);
42 static int virtio_dev_promiscuous_disable(struct rte_eth_dev *dev);
43 static int virtio_dev_allmulticast_enable(struct rte_eth_dev *dev);
44 static int virtio_dev_allmulticast_disable(struct rte_eth_dev *dev);
45 static uint32_t virtio_dev_speed_capa_get(uint32_t speed);
46 static int virtio_dev_devargs_parse(struct rte_devargs *devargs,
47 	uint32_t *speed,
48 	int *vectorized);
49 static int virtio_dev_info_get(struct rte_eth_dev *dev,
50 				struct rte_eth_dev_info *dev_info);
51 static int virtio_dev_link_update(struct rte_eth_dev *dev,
52 	int wait_to_complete);
53 static int virtio_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask);
54 
55 static void virtio_set_hwaddr(struct virtio_hw *hw);
56 static void virtio_get_hwaddr(struct virtio_hw *hw);
57 
58 static int virtio_dev_stats_get(struct rte_eth_dev *dev,
59 				 struct rte_eth_stats *stats);
60 static int virtio_dev_xstats_get(struct rte_eth_dev *dev,
61 				 struct rte_eth_xstat *xstats, unsigned n);
62 static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
63 				       struct rte_eth_xstat_name *xstats_names,
64 				       unsigned limit);
65 static int virtio_dev_stats_reset(struct rte_eth_dev *dev);
66 static void virtio_dev_free_mbufs(struct rte_eth_dev *dev);
67 static int virtio_vlan_filter_set(struct rte_eth_dev *dev,
68 				uint16_t vlan_id, int on);
69 static int virtio_mac_addr_add(struct rte_eth_dev *dev,
70 				struct rte_ether_addr *mac_addr,
71 				uint32_t index, uint32_t vmdq);
72 static void virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index);
73 static int virtio_mac_addr_set(struct rte_eth_dev *dev,
74 				struct rte_ether_addr *mac_addr);
75 
76 static int virtio_intr_disable(struct rte_eth_dev *dev);
77 
78 static int virtio_dev_queue_stats_mapping_set(
79 	struct rte_eth_dev *eth_dev,
80 	uint16_t queue_id,
81 	uint8_t stat_idx,
82 	uint8_t is_rx);
83 
84 static void virtio_notify_peers(struct rte_eth_dev *dev);
85 static void virtio_ack_link_announce(struct rte_eth_dev *dev);
86 
87 struct rte_virtio_xstats_name_off {
88 	char name[RTE_ETH_XSTATS_NAME_SIZE];
89 	unsigned offset;
90 };
91 
92 /* [rt]x_qX_ is prepended to the name string here */
93 static const struct rte_virtio_xstats_name_off rte_virtio_rxq_stat_strings[] = {
94 	{"good_packets",           offsetof(struct virtnet_rx, stats.packets)},
95 	{"good_bytes",             offsetof(struct virtnet_rx, stats.bytes)},
96 	{"errors",                 offsetof(struct virtnet_rx, stats.errors)},
97 	{"multicast_packets",      offsetof(struct virtnet_rx, stats.multicast)},
98 	{"broadcast_packets",      offsetof(struct virtnet_rx, stats.broadcast)},
99 	{"undersize_packets",      offsetof(struct virtnet_rx, stats.size_bins[0])},
100 	{"size_64_packets",        offsetof(struct virtnet_rx, stats.size_bins[1])},
101 	{"size_65_127_packets",    offsetof(struct virtnet_rx, stats.size_bins[2])},
102 	{"size_128_255_packets",   offsetof(struct virtnet_rx, stats.size_bins[3])},
103 	{"size_256_511_packets",   offsetof(struct virtnet_rx, stats.size_bins[4])},
104 	{"size_512_1023_packets",  offsetof(struct virtnet_rx, stats.size_bins[5])},
105 	{"size_1024_1518_packets", offsetof(struct virtnet_rx, stats.size_bins[6])},
106 	{"size_1519_max_packets",  offsetof(struct virtnet_rx, stats.size_bins[7])},
107 };
108 
109 /* [rt]x_qX_ is prepended to the name string here */
110 static const struct rte_virtio_xstats_name_off rte_virtio_txq_stat_strings[] = {
111 	{"good_packets",           offsetof(struct virtnet_tx, stats.packets)},
112 	{"good_bytes",             offsetof(struct virtnet_tx, stats.bytes)},
113 	{"multicast_packets",      offsetof(struct virtnet_tx, stats.multicast)},
114 	{"broadcast_packets",      offsetof(struct virtnet_tx, stats.broadcast)},
115 	{"undersize_packets",      offsetof(struct virtnet_tx, stats.size_bins[0])},
116 	{"size_64_packets",        offsetof(struct virtnet_tx, stats.size_bins[1])},
117 	{"size_65_127_packets",    offsetof(struct virtnet_tx, stats.size_bins[2])},
118 	{"size_128_255_packets",   offsetof(struct virtnet_tx, stats.size_bins[3])},
119 	{"size_256_511_packets",   offsetof(struct virtnet_tx, stats.size_bins[4])},
120 	{"size_512_1023_packets",  offsetof(struct virtnet_tx, stats.size_bins[5])},
121 	{"size_1024_1518_packets", offsetof(struct virtnet_tx, stats.size_bins[6])},
122 	{"size_1519_max_packets",  offsetof(struct virtnet_tx, stats.size_bins[7])},
123 };
124 
125 #define VIRTIO_NB_RXQ_XSTATS (sizeof(rte_virtio_rxq_stat_strings) / \
126 			    sizeof(rte_virtio_rxq_stat_strings[0]))
127 #define VIRTIO_NB_TXQ_XSTATS (sizeof(rte_virtio_txq_stat_strings) / \
128 			    sizeof(rte_virtio_txq_stat_strings[0]))
129 
130 struct virtio_hw_internal virtio_hw_internal[RTE_MAX_ETHPORTS];
131 
132 static struct virtio_pmd_ctrl *
133 virtio_send_command_packed(struct virtnet_ctl *cvq,
134 			   struct virtio_pmd_ctrl *ctrl,
135 			   int *dlen, int pkt_num)
136 {
137 	struct virtqueue *vq = virtnet_cq_to_vq(cvq);
138 	int head;
139 	struct vring_packed_desc *desc = vq->vq_packed.ring.desc;
140 	struct virtio_pmd_ctrl *result;
141 	uint16_t flags;
142 	int sum = 0;
143 	int nb_descs = 0;
144 	int k;
145 
146 	/*
147 	 * Format is enforced in qemu code:
148 	 * One TX packet for header;
149 	 * At least one TX packet per argument;
150 	 * One RX packet for ACK.
151 	 */
152 	head = vq->vq_avail_idx;
153 	flags = vq->vq_packed.cached_flags;
154 	desc[head].addr = cvq->virtio_net_hdr_mem;
155 	desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
156 	vq->vq_free_cnt--;
157 	nb_descs++;
158 	if (++vq->vq_avail_idx >= vq->vq_nentries) {
159 		vq->vq_avail_idx -= vq->vq_nentries;
160 		vq->vq_packed.cached_flags ^= VRING_PACKED_DESC_F_AVAIL_USED;
161 	}
162 
163 	for (k = 0; k < pkt_num; k++) {
164 		desc[vq->vq_avail_idx].addr = cvq->virtio_net_hdr_mem
165 			+ sizeof(struct virtio_net_ctrl_hdr)
166 			+ sizeof(ctrl->status) + sizeof(uint8_t) * sum;
167 		desc[vq->vq_avail_idx].len = dlen[k];
168 		desc[vq->vq_avail_idx].flags = VRING_DESC_F_NEXT |
169 			vq->vq_packed.cached_flags;
170 		sum += dlen[k];
171 		vq->vq_free_cnt--;
172 		nb_descs++;
173 		if (++vq->vq_avail_idx >= vq->vq_nentries) {
174 			vq->vq_avail_idx -= vq->vq_nentries;
175 			vq->vq_packed.cached_flags ^=
176 				VRING_PACKED_DESC_F_AVAIL_USED;
177 		}
178 	}
179 
180 	desc[vq->vq_avail_idx].addr = cvq->virtio_net_hdr_mem
181 		+ sizeof(struct virtio_net_ctrl_hdr);
182 	desc[vq->vq_avail_idx].len = sizeof(ctrl->status);
183 	desc[vq->vq_avail_idx].flags = VRING_DESC_F_WRITE |
184 		vq->vq_packed.cached_flags;
185 	vq->vq_free_cnt--;
186 	nb_descs++;
187 	if (++vq->vq_avail_idx >= vq->vq_nentries) {
188 		vq->vq_avail_idx -= vq->vq_nentries;
189 		vq->vq_packed.cached_flags ^= VRING_PACKED_DESC_F_AVAIL_USED;
190 	}
191 
192 	virtqueue_store_flags_packed(&desc[head], VRING_DESC_F_NEXT | flags,
193 			vq->hw->weak_barriers);
194 
195 	virtio_wmb(vq->hw->weak_barriers);
196 	virtqueue_notify(vq);
197 
198 	/* wait for used desc in virtqueue
199 	 * desc_is_used has a load-acquire or rte_io_rmb inside
200 	 */
201 	while (!desc_is_used(&desc[head], vq))
202 		usleep(100);
203 
204 	/* now get used descriptors */
205 	vq->vq_free_cnt += nb_descs;
206 	vq->vq_used_cons_idx += nb_descs;
207 	if (vq->vq_used_cons_idx >= vq->vq_nentries) {
208 		vq->vq_used_cons_idx -= vq->vq_nentries;
209 		vq->vq_packed.used_wrap_counter ^= 1;
210 	}
211 
212 	PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\n"
213 			"vq->vq_avail_idx=%d\n"
214 			"vq->vq_used_cons_idx=%d\n"
215 			"vq->vq_packed.cached_flags=0x%x\n"
216 			"vq->vq_packed.used_wrap_counter=%d",
217 			vq->vq_free_cnt,
218 			vq->vq_avail_idx,
219 			vq->vq_used_cons_idx,
220 			vq->vq_packed.cached_flags,
221 			vq->vq_packed.used_wrap_counter);
222 
223 	result = cvq->virtio_net_hdr_mz->addr;
224 	return result;
225 }
226 
227 static struct virtio_pmd_ctrl *
228 virtio_send_command_split(struct virtnet_ctl *cvq,
229 			  struct virtio_pmd_ctrl *ctrl,
230 			  int *dlen, int pkt_num)
231 {
232 	struct virtio_pmd_ctrl *result;
233 	struct virtqueue *vq = virtnet_cq_to_vq(cvq);
234 	uint32_t head, i;
235 	int k, sum = 0;
236 
237 	head = vq->vq_desc_head_idx;
238 
239 	/*
240 	 * Format is enforced in qemu code:
241 	 * One TX packet for header;
242 	 * At least one TX packet per argument;
243 	 * One RX packet for ACK.
244 	 */
245 	vq->vq_split.ring.desc[head].flags = VRING_DESC_F_NEXT;
246 	vq->vq_split.ring.desc[head].addr = cvq->virtio_net_hdr_mem;
247 	vq->vq_split.ring.desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
248 	vq->vq_free_cnt--;
249 	i = vq->vq_split.ring.desc[head].next;
250 
251 	for (k = 0; k < pkt_num; k++) {
252 		vq->vq_split.ring.desc[i].flags = VRING_DESC_F_NEXT;
253 		vq->vq_split.ring.desc[i].addr = cvq->virtio_net_hdr_mem
254 			+ sizeof(struct virtio_net_ctrl_hdr)
255 			+ sizeof(ctrl->status) + sizeof(uint8_t)*sum;
256 		vq->vq_split.ring.desc[i].len = dlen[k];
257 		sum += dlen[k];
258 		vq->vq_free_cnt--;
259 		i = vq->vq_split.ring.desc[i].next;
260 	}
261 
262 	vq->vq_split.ring.desc[i].flags = VRING_DESC_F_WRITE;
263 	vq->vq_split.ring.desc[i].addr = cvq->virtio_net_hdr_mem
264 			+ sizeof(struct virtio_net_ctrl_hdr);
265 	vq->vq_split.ring.desc[i].len = sizeof(ctrl->status);
266 	vq->vq_free_cnt--;
267 
268 	vq->vq_desc_head_idx = vq->vq_split.ring.desc[i].next;
269 
270 	vq_update_avail_ring(vq, head);
271 	vq_update_avail_idx(vq);
272 
273 	PMD_INIT_LOG(DEBUG, "vq->vq_queue_index = %d", vq->vq_queue_index);
274 
275 	virtqueue_notify(vq);
276 
277 	while (virtqueue_nused(vq) == 0)
278 		usleep(100);
279 
280 	while (virtqueue_nused(vq)) {
281 		uint32_t idx, desc_idx, used_idx;
282 		struct vring_used_elem *uep;
283 
284 		used_idx = (uint32_t)(vq->vq_used_cons_idx
285 				& (vq->vq_nentries - 1));
286 		uep = &vq->vq_split.ring.used->ring[used_idx];
287 		idx = (uint32_t) uep->id;
288 		desc_idx = idx;
289 
290 		while (vq->vq_split.ring.desc[desc_idx].flags &
291 				VRING_DESC_F_NEXT) {
292 			desc_idx = vq->vq_split.ring.desc[desc_idx].next;
293 			vq->vq_free_cnt++;
294 		}
295 
296 		vq->vq_split.ring.desc[desc_idx].next = vq->vq_desc_head_idx;
297 		vq->vq_desc_head_idx = idx;
298 
299 		vq->vq_used_cons_idx++;
300 		vq->vq_free_cnt++;
301 	}
302 
303 	PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\nvq->vq_desc_head_idx=%d",
304 			vq->vq_free_cnt, vq->vq_desc_head_idx);
305 
306 	result = cvq->virtio_net_hdr_mz->addr;
307 	return result;
308 }
309 
310 static int
311 virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl,
312 		    int *dlen, int pkt_num)
313 {
314 	virtio_net_ctrl_ack status = ~0;
315 	struct virtio_pmd_ctrl *result;
316 	struct virtqueue *vq;
317 
318 	ctrl->status = status;
319 
320 	if (!cvq) {
321 		PMD_INIT_LOG(ERR, "Control queue is not supported.");
322 		return -1;
323 	}
324 
325 	rte_spinlock_lock(&cvq->lock);
326 	vq = virtnet_cq_to_vq(cvq);
327 
328 	PMD_INIT_LOG(DEBUG, "vq->vq_desc_head_idx = %d, status = %d, "
329 		"vq->hw->cvq = %p vq = %p",
330 		vq->vq_desc_head_idx, status, vq->hw->cvq, vq);
331 
332 	if (vq->vq_free_cnt < pkt_num + 2 || pkt_num < 1) {
333 		rte_spinlock_unlock(&cvq->lock);
334 		return -1;
335 	}
336 
337 	memcpy(cvq->virtio_net_hdr_mz->addr, ctrl,
338 		sizeof(struct virtio_pmd_ctrl));
339 
340 	if (virtio_with_packed_queue(vq->hw))
341 		result = virtio_send_command_packed(cvq, ctrl, dlen, pkt_num);
342 	else
343 		result = virtio_send_command_split(cvq, ctrl, dlen, pkt_num);
344 
345 	rte_spinlock_unlock(&cvq->lock);
346 	return result->status;
347 }
348 
349 static int
350 virtio_set_multiple_queues(struct rte_eth_dev *dev, uint16_t nb_queues)
351 {
352 	struct virtio_hw *hw = dev->data->dev_private;
353 	struct virtio_pmd_ctrl ctrl;
354 	int dlen[1];
355 	int ret;
356 
357 	ctrl.hdr.class = VIRTIO_NET_CTRL_MQ;
358 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET;
359 	memcpy(ctrl.data, &nb_queues, sizeof(uint16_t));
360 
361 	dlen[0] = sizeof(uint16_t);
362 
363 	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
364 	if (ret) {
365 		PMD_INIT_LOG(ERR, "Multiqueue configured but send command "
366 			  "failed, this is too late now...");
367 		return -EINVAL;
368 	}
369 
370 	return 0;
371 }
372 
373 static void
374 virtio_dev_queue_release(void *queue __rte_unused)
375 {
376 	/* do nothing */
377 }
378 
379 static uint16_t
380 virtio_get_nr_vq(struct virtio_hw *hw)
381 {
382 	uint16_t nr_vq = hw->max_queue_pairs * 2;
383 
384 	if (virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VQ))
385 		nr_vq += 1;
386 
387 	return nr_vq;
388 }
389 
390 static void
391 virtio_init_vring(struct virtqueue *vq)
392 {
393 	int size = vq->vq_nentries;
394 	uint8_t *ring_mem = vq->vq_ring_virt_mem;
395 
396 	PMD_INIT_FUNC_TRACE();
397 
398 	memset(ring_mem, 0, vq->vq_ring_size);
399 
400 	vq->vq_used_cons_idx = 0;
401 	vq->vq_desc_head_idx = 0;
402 	vq->vq_avail_idx = 0;
403 	vq->vq_desc_tail_idx = (uint16_t)(vq->vq_nentries - 1);
404 	vq->vq_free_cnt = vq->vq_nentries;
405 	memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries);
406 	if (virtio_with_packed_queue(vq->hw)) {
407 		vring_init_packed(&vq->vq_packed.ring, ring_mem,
408 				  VIRTIO_VRING_ALIGN, size);
409 		vring_desc_init_packed(vq, size);
410 	} else {
411 		struct vring *vr = &vq->vq_split.ring;
412 
413 		vring_init_split(vr, ring_mem, VIRTIO_VRING_ALIGN, size);
414 		vring_desc_init_split(vr->desc, size);
415 	}
416 	/*
417 	 * Disable device(host) interrupting guest
418 	 */
419 	virtqueue_disable_intr(vq);
420 }
421 
422 static int
423 virtio_init_queue(struct rte_eth_dev *dev, uint16_t queue_idx)
424 {
425 	char vq_name[VIRTQUEUE_MAX_NAME_SZ];
426 	char vq_hdr_name[VIRTQUEUE_MAX_NAME_SZ];
427 	const struct rte_memzone *mz = NULL, *hdr_mz = NULL;
428 	unsigned int vq_size, size;
429 	struct virtio_hw *hw = dev->data->dev_private;
430 	struct virtnet_rx *rxvq = NULL;
431 	struct virtnet_tx *txvq = NULL;
432 	struct virtnet_ctl *cvq = NULL;
433 	struct virtqueue *vq;
434 	size_t sz_hdr_mz = 0;
435 	void *sw_ring = NULL;
436 	int queue_type = virtio_get_queue_type(hw, queue_idx);
437 	int ret;
438 	int numa_node = dev->device->numa_node;
439 	struct rte_mbuf *fake_mbuf = NULL;
440 
441 	PMD_INIT_LOG(INFO, "setting up queue: %u on NUMA node %d",
442 			queue_idx, numa_node);
443 
444 	/*
445 	 * Read the virtqueue size from the Queue Size field
446 	 * Always power of 2 and if 0 virtqueue does not exist
447 	 */
448 	vq_size = VIRTIO_OPS(hw)->get_queue_num(hw, queue_idx);
449 	PMD_INIT_LOG(DEBUG, "vq_size: %u", vq_size);
450 	if (vq_size == 0) {
451 		PMD_INIT_LOG(ERR, "virtqueue does not exist");
452 		return -EINVAL;
453 	}
454 
455 	if (!virtio_with_packed_queue(hw) && !rte_is_power_of_2(vq_size)) {
456 		PMD_INIT_LOG(ERR, "split virtqueue size is not power of 2");
457 		return -EINVAL;
458 	}
459 
460 	snprintf(vq_name, sizeof(vq_name), "port%d_vq%d",
461 		 dev->data->port_id, queue_idx);
462 
463 	size = RTE_ALIGN_CEIL(sizeof(*vq) +
464 				vq_size * sizeof(struct vq_desc_extra),
465 				RTE_CACHE_LINE_SIZE);
466 	if (queue_type == VTNET_TQ) {
467 		/*
468 		 * For each xmit packet, allocate a virtio_net_hdr
469 		 * and indirect ring elements
470 		 */
471 		sz_hdr_mz = vq_size * sizeof(struct virtio_tx_region);
472 	} else if (queue_type == VTNET_CQ) {
473 		/* Allocate a page for control vq command, data and status */
474 		sz_hdr_mz = rte_mem_page_size();
475 	}
476 
477 	vq = rte_zmalloc_socket(vq_name, size, RTE_CACHE_LINE_SIZE,
478 				numa_node);
479 	if (vq == NULL) {
480 		PMD_INIT_LOG(ERR, "can not allocate vq");
481 		return -ENOMEM;
482 	}
483 	hw->vqs[queue_idx] = vq;
484 
485 	vq->hw = hw;
486 	vq->vq_queue_index = queue_idx;
487 	vq->vq_nentries = vq_size;
488 	if (virtio_with_packed_queue(hw)) {
489 		vq->vq_packed.used_wrap_counter = 1;
490 		vq->vq_packed.cached_flags = VRING_PACKED_DESC_F_AVAIL;
491 		vq->vq_packed.event_flags_shadow = 0;
492 		if (queue_type == VTNET_RQ)
493 			vq->vq_packed.cached_flags |= VRING_DESC_F_WRITE;
494 	}
495 
496 	/*
497 	 * Reserve a memzone for vring elements
498 	 */
499 	size = vring_size(hw, vq_size, VIRTIO_VRING_ALIGN);
500 	vq->vq_ring_size = RTE_ALIGN_CEIL(size, VIRTIO_VRING_ALIGN);
501 	PMD_INIT_LOG(DEBUG, "vring_size: %d, rounded_vring_size: %d",
502 		     size, vq->vq_ring_size);
503 
504 	mz = rte_memzone_reserve_aligned(vq_name, vq->vq_ring_size,
505 			numa_node, RTE_MEMZONE_IOVA_CONTIG,
506 			VIRTIO_VRING_ALIGN);
507 	if (mz == NULL) {
508 		if (rte_errno == EEXIST)
509 			mz = rte_memzone_lookup(vq_name);
510 		if (mz == NULL) {
511 			ret = -ENOMEM;
512 			goto free_vq;
513 		}
514 	}
515 
516 	memset(mz->addr, 0, mz->len);
517 
518 	if (hw->use_va)
519 		vq->vq_ring_mem = (uintptr_t)mz->addr;
520 	else
521 		vq->vq_ring_mem = mz->iova;
522 
523 	vq->vq_ring_virt_mem = mz->addr;
524 	PMD_INIT_LOG(DEBUG, "vq->vq_ring_mem: 0x%" PRIx64, vq->vq_ring_mem);
525 	PMD_INIT_LOG(DEBUG, "vq->vq_ring_virt_mem: %p", vq->vq_ring_virt_mem);
526 
527 	virtio_init_vring(vq);
528 
529 	if (sz_hdr_mz) {
530 		snprintf(vq_hdr_name, sizeof(vq_hdr_name), "port%d_vq%d_hdr",
531 			 dev->data->port_id, queue_idx);
532 		hdr_mz = rte_memzone_reserve_aligned(vq_hdr_name, sz_hdr_mz,
533 				numa_node, RTE_MEMZONE_IOVA_CONTIG,
534 				RTE_CACHE_LINE_SIZE);
535 		if (hdr_mz == NULL) {
536 			if (rte_errno == EEXIST)
537 				hdr_mz = rte_memzone_lookup(vq_hdr_name);
538 			if (hdr_mz == NULL) {
539 				ret = -ENOMEM;
540 				goto free_mz;
541 			}
542 		}
543 	}
544 
545 	if (queue_type == VTNET_RQ) {
546 		size_t sz_sw = (RTE_PMD_VIRTIO_RX_MAX_BURST + vq_size) *
547 			       sizeof(vq->sw_ring[0]);
548 
549 		sw_ring = rte_zmalloc_socket("sw_ring", sz_sw,
550 				RTE_CACHE_LINE_SIZE, numa_node);
551 		if (!sw_ring) {
552 			PMD_INIT_LOG(ERR, "can not allocate RX soft ring");
553 			ret = -ENOMEM;
554 			goto free_hdr_mz;
555 		}
556 
557 		fake_mbuf = rte_zmalloc_socket("sw_ring", sizeof(*fake_mbuf),
558 				RTE_CACHE_LINE_SIZE, numa_node);
559 		if (!fake_mbuf) {
560 			PMD_INIT_LOG(ERR, "can not allocate fake mbuf");
561 			ret = -ENOMEM;
562 			goto free_sw_ring;
563 		}
564 
565 		vq->sw_ring = sw_ring;
566 		rxvq = &vq->rxq;
567 		rxvq->port_id = dev->data->port_id;
568 		rxvq->mz = mz;
569 		rxvq->fake_mbuf = fake_mbuf;
570 	} else if (queue_type == VTNET_TQ) {
571 		txvq = &vq->txq;
572 		txvq->port_id = dev->data->port_id;
573 		txvq->mz = mz;
574 		txvq->virtio_net_hdr_mz = hdr_mz;
575 		if (hw->use_va)
576 			txvq->virtio_net_hdr_mem = (uintptr_t)hdr_mz->addr;
577 		else
578 			txvq->virtio_net_hdr_mem = hdr_mz->iova;
579 	} else if (queue_type == VTNET_CQ) {
580 		cvq = &vq->cq;
581 		cvq->mz = mz;
582 		cvq->virtio_net_hdr_mz = hdr_mz;
583 		if (hw->use_va)
584 			cvq->virtio_net_hdr_mem = (uintptr_t)hdr_mz->addr;
585 		else
586 			cvq->virtio_net_hdr_mem = hdr_mz->iova;
587 		memset(cvq->virtio_net_hdr_mz->addr, 0, rte_mem_page_size());
588 
589 		hw->cvq = cvq;
590 	}
591 
592 	if (hw->use_va)
593 		vq->mbuf_addr_offset = offsetof(struct rte_mbuf, buf_addr);
594 	else
595 		vq->mbuf_addr_offset = offsetof(struct rte_mbuf, buf_iova);
596 
597 	if (queue_type == VTNET_TQ) {
598 		struct virtio_tx_region *txr;
599 		unsigned int i;
600 
601 		txr = hdr_mz->addr;
602 		memset(txr, 0, vq_size * sizeof(*txr));
603 		for (i = 0; i < vq_size; i++) {
604 			/* first indirect descriptor is always the tx header */
605 			if (!virtio_with_packed_queue(hw)) {
606 				struct vring_desc *start_dp = txr[i].tx_indir;
607 				vring_desc_init_split(start_dp,
608 						      RTE_DIM(txr[i].tx_indir));
609 				start_dp->addr = txvq->virtio_net_hdr_mem
610 					+ i * sizeof(*txr)
611 					+ offsetof(struct virtio_tx_region,
612 						   tx_hdr);
613 				start_dp->len = hw->vtnet_hdr_size;
614 				start_dp->flags = VRING_DESC_F_NEXT;
615 			} else {
616 				struct vring_packed_desc *start_dp =
617 					txr[i].tx_packed_indir;
618 				vring_desc_init_indirect_packed(start_dp,
619 				      RTE_DIM(txr[i].tx_packed_indir));
620 				start_dp->addr = txvq->virtio_net_hdr_mem
621 					+ i * sizeof(*txr)
622 					+ offsetof(struct virtio_tx_region,
623 						   tx_hdr);
624 				start_dp->len = hw->vtnet_hdr_size;
625 			}
626 		}
627 	}
628 
629 	if (VIRTIO_OPS(hw)->setup_queue(hw, vq) < 0) {
630 		PMD_INIT_LOG(ERR, "setup_queue failed");
631 		ret = -EINVAL;
632 		goto clean_vq;
633 	}
634 
635 	return 0;
636 
637 clean_vq:
638 	hw->cvq = NULL;
639 	rte_free(fake_mbuf);
640 free_sw_ring:
641 	rte_free(sw_ring);
642 free_hdr_mz:
643 	rte_memzone_free(hdr_mz);
644 free_mz:
645 	rte_memzone_free(mz);
646 free_vq:
647 	rte_free(vq);
648 	hw->vqs[queue_idx] = NULL;
649 
650 	return ret;
651 }
652 
653 static void
654 virtio_free_queues(struct virtio_hw *hw)
655 {
656 	uint16_t nr_vq = virtio_get_nr_vq(hw);
657 	struct virtqueue *vq;
658 	int queue_type;
659 	uint16_t i;
660 
661 	if (hw->vqs == NULL)
662 		return;
663 
664 	for (i = 0; i < nr_vq; i++) {
665 		vq = hw->vqs[i];
666 		if (!vq)
667 			continue;
668 
669 		queue_type = virtio_get_queue_type(hw, i);
670 		if (queue_type == VTNET_RQ) {
671 			rte_free(vq->rxq.fake_mbuf);
672 			rte_free(vq->sw_ring);
673 			rte_memzone_free(vq->rxq.mz);
674 		} else if (queue_type == VTNET_TQ) {
675 			rte_memzone_free(vq->txq.mz);
676 			rte_memzone_free(vq->txq.virtio_net_hdr_mz);
677 		} else {
678 			rte_memzone_free(vq->cq.mz);
679 			rte_memzone_free(vq->cq.virtio_net_hdr_mz);
680 		}
681 
682 		rte_free(vq);
683 		hw->vqs[i] = NULL;
684 	}
685 
686 	rte_free(hw->vqs);
687 	hw->vqs = NULL;
688 }
689 
690 static int
691 virtio_alloc_queues(struct rte_eth_dev *dev)
692 {
693 	struct virtio_hw *hw = dev->data->dev_private;
694 	uint16_t nr_vq = virtio_get_nr_vq(hw);
695 	uint16_t i;
696 	int ret;
697 
698 	hw->vqs = rte_zmalloc(NULL, sizeof(struct virtqueue *) * nr_vq, 0);
699 	if (!hw->vqs) {
700 		PMD_INIT_LOG(ERR, "failed to allocate vqs");
701 		return -ENOMEM;
702 	}
703 
704 	for (i = 0; i < nr_vq; i++) {
705 		ret = virtio_init_queue(dev, i);
706 		if (ret < 0) {
707 			virtio_free_queues(hw);
708 			return ret;
709 		}
710 	}
711 
712 	return 0;
713 }
714 
715 static void virtio_queues_unbind_intr(struct rte_eth_dev *dev);
716 
717 int
718 virtio_dev_close(struct rte_eth_dev *dev)
719 {
720 	struct virtio_hw *hw = dev->data->dev_private;
721 	struct rte_intr_conf *intr_conf = &dev->data->dev_conf.intr_conf;
722 
723 	PMD_INIT_LOG(DEBUG, "virtio_dev_close");
724 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
725 		return 0;
726 
727 	if (!hw->opened)
728 		return 0;
729 	hw->opened = 0;
730 
731 	/* reset the NIC */
732 	if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
733 		VIRTIO_OPS(hw)->set_config_irq(hw, VIRTIO_MSI_NO_VECTOR);
734 	if (intr_conf->rxq)
735 		virtio_queues_unbind_intr(dev);
736 
737 	if (intr_conf->lsc || intr_conf->rxq) {
738 		virtio_intr_disable(dev);
739 		rte_intr_efd_disable(dev->intr_handle);
740 		rte_free(dev->intr_handle->intr_vec);
741 		dev->intr_handle->intr_vec = NULL;
742 	}
743 
744 	virtio_reset(hw);
745 	virtio_dev_free_mbufs(dev);
746 	virtio_free_queues(hw);
747 
748 	return VIRTIO_OPS(hw)->dev_close(hw);
749 }
750 
751 static int
752 virtio_dev_promiscuous_enable(struct rte_eth_dev *dev)
753 {
754 	struct virtio_hw *hw = dev->data->dev_private;
755 	struct virtio_pmd_ctrl ctrl;
756 	int dlen[1];
757 	int ret;
758 
759 	if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
760 		PMD_INIT_LOG(INFO, "host does not support rx control");
761 		return -ENOTSUP;
762 	}
763 
764 	ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
765 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC;
766 	ctrl.data[0] = 1;
767 	dlen[0] = 1;
768 
769 	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
770 	if (ret) {
771 		PMD_INIT_LOG(ERR, "Failed to enable promisc");
772 		return -EAGAIN;
773 	}
774 
775 	return 0;
776 }
777 
778 static int
779 virtio_dev_promiscuous_disable(struct rte_eth_dev *dev)
780 {
781 	struct virtio_hw *hw = dev->data->dev_private;
782 	struct virtio_pmd_ctrl ctrl;
783 	int dlen[1];
784 	int ret;
785 
786 	if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
787 		PMD_INIT_LOG(INFO, "host does not support rx control");
788 		return -ENOTSUP;
789 	}
790 
791 	ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
792 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC;
793 	ctrl.data[0] = 0;
794 	dlen[0] = 1;
795 
796 	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
797 	if (ret) {
798 		PMD_INIT_LOG(ERR, "Failed to disable promisc");
799 		return -EAGAIN;
800 	}
801 
802 	return 0;
803 }
804 
805 static int
806 virtio_dev_allmulticast_enable(struct rte_eth_dev *dev)
807 {
808 	struct virtio_hw *hw = dev->data->dev_private;
809 	struct virtio_pmd_ctrl ctrl;
810 	int dlen[1];
811 	int ret;
812 
813 	if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
814 		PMD_INIT_LOG(INFO, "host does not support rx control");
815 		return -ENOTSUP;
816 	}
817 
818 	ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
819 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI;
820 	ctrl.data[0] = 1;
821 	dlen[0] = 1;
822 
823 	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
824 	if (ret) {
825 		PMD_INIT_LOG(ERR, "Failed to enable allmulticast");
826 		return -EAGAIN;
827 	}
828 
829 	return 0;
830 }
831 
832 static int
833 virtio_dev_allmulticast_disable(struct rte_eth_dev *dev)
834 {
835 	struct virtio_hw *hw = dev->data->dev_private;
836 	struct virtio_pmd_ctrl ctrl;
837 	int dlen[1];
838 	int ret;
839 
840 	if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
841 		PMD_INIT_LOG(INFO, "host does not support rx control");
842 		return -ENOTSUP;
843 	}
844 
845 	ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
846 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI;
847 	ctrl.data[0] = 0;
848 	dlen[0] = 1;
849 
850 	ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
851 	if (ret) {
852 		PMD_INIT_LOG(ERR, "Failed to disable allmulticast");
853 		return -EAGAIN;
854 	}
855 
856 	return 0;
857 }
858 
859 uint16_t
860 virtio_rx_mem_pool_buf_size(struct rte_mempool *mp)
861 {
862 	return rte_pktmbuf_data_room_size(mp) - RTE_PKTMBUF_HEADROOM;
863 }
864 
865 bool
866 virtio_rx_check_scatter(uint16_t max_rx_pkt_len, uint16_t rx_buf_size,
867 			bool rx_scatter_enabled, const char **error)
868 {
869 	if (!rx_scatter_enabled && max_rx_pkt_len > rx_buf_size) {
870 		*error = "Rx scatter is disabled and RxQ mbuf pool object size is too small";
871 		return false;
872 	}
873 
874 	return true;
875 }
876 
877 static bool
878 virtio_check_scatter_on_all_rx_queues(struct rte_eth_dev *dev,
879 				      uint16_t frame_size)
880 {
881 	struct virtio_hw *hw = dev->data->dev_private;
882 	struct virtnet_rx *rxvq;
883 	struct virtqueue *vq;
884 	unsigned int qidx;
885 	uint16_t buf_size;
886 	const char *error;
887 
888 	if (hw->vqs == NULL)
889 		return true;
890 
891 	for (qidx = 0; (vq = hw->vqs[2 * qidx + VTNET_SQ_RQ_QUEUE_IDX]) != NULL;
892 	     qidx++) {
893 		rxvq = &vq->rxq;
894 		if (rxvq->mpool == NULL)
895 			continue;
896 		buf_size = virtio_rx_mem_pool_buf_size(rxvq->mpool);
897 
898 		if (!virtio_rx_check_scatter(frame_size, buf_size,
899 					     hw->rx_ol_scatter, &error)) {
900 			PMD_INIT_LOG(ERR, "MTU check for RxQ %u failed: %s",
901 				     qidx, error);
902 			return false;
903 		}
904 	}
905 
906 	return true;
907 }
908 
909 #define VLAN_TAG_LEN           4    /* 802.3ac tag (not DMA'd) */
910 static int
911 virtio_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
912 {
913 	struct virtio_hw *hw = dev->data->dev_private;
914 	uint32_t ether_hdr_len = RTE_ETHER_HDR_LEN + VLAN_TAG_LEN +
915 				 hw->vtnet_hdr_size;
916 	uint32_t frame_size = mtu + ether_hdr_len;
917 	uint32_t max_frame_size = hw->max_mtu + ether_hdr_len;
918 
919 	max_frame_size = RTE_MIN(max_frame_size, VIRTIO_MAX_RX_PKTLEN);
920 
921 	if (mtu < RTE_ETHER_MIN_MTU || frame_size > max_frame_size) {
922 		PMD_INIT_LOG(ERR, "MTU should be between %d and %d",
923 			RTE_ETHER_MIN_MTU, max_frame_size - ether_hdr_len);
924 		return -EINVAL;
925 	}
926 
927 	if (!virtio_check_scatter_on_all_rx_queues(dev, frame_size)) {
928 		PMD_INIT_LOG(ERR, "MTU vs Rx scatter and Rx buffers check failed");
929 		return -EINVAL;
930 	}
931 
932 	hw->max_rx_pkt_len = frame_size;
933 	dev->data->dev_conf.rxmode.max_rx_pkt_len = hw->max_rx_pkt_len;
934 
935 	return 0;
936 }
937 
938 static int
939 virtio_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
940 {
941 	struct virtio_hw *hw = dev->data->dev_private;
942 	struct virtnet_rx *rxvq = dev->data->rx_queues[queue_id];
943 	struct virtqueue *vq = virtnet_rxq_to_vq(rxvq);
944 
945 	virtqueue_enable_intr(vq);
946 	virtio_mb(hw->weak_barriers);
947 	return 0;
948 }
949 
950 static int
951 virtio_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
952 {
953 	struct virtnet_rx *rxvq = dev->data->rx_queues[queue_id];
954 	struct virtqueue *vq = virtnet_rxq_to_vq(rxvq);
955 
956 	virtqueue_disable_intr(vq);
957 	return 0;
958 }
959 
960 /*
961  * dev_ops for virtio, bare necessities for basic operation
962  */
963 static const struct eth_dev_ops virtio_eth_dev_ops = {
964 	.dev_configure           = virtio_dev_configure,
965 	.dev_start               = virtio_dev_start,
966 	.dev_stop                = virtio_dev_stop,
967 	.dev_close               = virtio_dev_close,
968 	.promiscuous_enable      = virtio_dev_promiscuous_enable,
969 	.promiscuous_disable     = virtio_dev_promiscuous_disable,
970 	.allmulticast_enable     = virtio_dev_allmulticast_enable,
971 	.allmulticast_disable    = virtio_dev_allmulticast_disable,
972 	.mtu_set                 = virtio_mtu_set,
973 	.dev_infos_get           = virtio_dev_info_get,
974 	.stats_get               = virtio_dev_stats_get,
975 	.xstats_get              = virtio_dev_xstats_get,
976 	.xstats_get_names        = virtio_dev_xstats_get_names,
977 	.stats_reset             = virtio_dev_stats_reset,
978 	.xstats_reset            = virtio_dev_stats_reset,
979 	.link_update             = virtio_dev_link_update,
980 	.vlan_offload_set        = virtio_dev_vlan_offload_set,
981 	.rx_queue_setup          = virtio_dev_rx_queue_setup,
982 	.rx_queue_intr_enable    = virtio_dev_rx_queue_intr_enable,
983 	.rx_queue_intr_disable   = virtio_dev_rx_queue_intr_disable,
984 	.rx_queue_release        = virtio_dev_queue_release,
985 	.tx_queue_setup          = virtio_dev_tx_queue_setup,
986 	.tx_queue_release        = virtio_dev_queue_release,
987 	/* collect stats per queue */
988 	.queue_stats_mapping_set = virtio_dev_queue_stats_mapping_set,
989 	.vlan_filter_set         = virtio_vlan_filter_set,
990 	.mac_addr_add            = virtio_mac_addr_add,
991 	.mac_addr_remove         = virtio_mac_addr_remove,
992 	.mac_addr_set            = virtio_mac_addr_set,
993 };
994 
995 /*
996  * dev_ops for virtio-user in secondary processes, as we just have
997  * some limited supports currently.
998  */
999 const struct eth_dev_ops virtio_user_secondary_eth_dev_ops = {
1000 	.dev_infos_get           = virtio_dev_info_get,
1001 	.stats_get               = virtio_dev_stats_get,
1002 	.xstats_get              = virtio_dev_xstats_get,
1003 	.xstats_get_names        = virtio_dev_xstats_get_names,
1004 	.stats_reset             = virtio_dev_stats_reset,
1005 	.xstats_reset            = virtio_dev_stats_reset,
1006 	/* collect stats per queue */
1007 	.queue_stats_mapping_set = virtio_dev_queue_stats_mapping_set,
1008 };
1009 
1010 static void
1011 virtio_update_stats(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1012 {
1013 	unsigned i;
1014 
1015 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
1016 		const struct virtnet_tx *txvq = dev->data->tx_queues[i];
1017 		if (txvq == NULL)
1018 			continue;
1019 
1020 		stats->opackets += txvq->stats.packets;
1021 		stats->obytes += txvq->stats.bytes;
1022 
1023 		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
1024 			stats->q_opackets[i] = txvq->stats.packets;
1025 			stats->q_obytes[i] = txvq->stats.bytes;
1026 		}
1027 	}
1028 
1029 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
1030 		const struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1031 		if (rxvq == NULL)
1032 			continue;
1033 
1034 		stats->ipackets += rxvq->stats.packets;
1035 		stats->ibytes += rxvq->stats.bytes;
1036 		stats->ierrors += rxvq->stats.errors;
1037 
1038 		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
1039 			stats->q_ipackets[i] = rxvq->stats.packets;
1040 			stats->q_ibytes[i] = rxvq->stats.bytes;
1041 		}
1042 	}
1043 
1044 	stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed;
1045 }
1046 
1047 static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
1048 				       struct rte_eth_xstat_name *xstats_names,
1049 				       __rte_unused unsigned limit)
1050 {
1051 	unsigned i;
1052 	unsigned count = 0;
1053 	unsigned t;
1054 
1055 	unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_TXQ_XSTATS +
1056 		dev->data->nb_rx_queues * VIRTIO_NB_RXQ_XSTATS;
1057 
1058 	if (xstats_names != NULL) {
1059 		/* Note: limit checked in rte_eth_xstats_names() */
1060 
1061 		for (i = 0; i < dev->data->nb_rx_queues; i++) {
1062 			struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1063 			if (rxvq == NULL)
1064 				continue;
1065 			for (t = 0; t < VIRTIO_NB_RXQ_XSTATS; t++) {
1066 				snprintf(xstats_names[count].name,
1067 					sizeof(xstats_names[count].name),
1068 					"rx_q%u_%s", i,
1069 					rte_virtio_rxq_stat_strings[t].name);
1070 				count++;
1071 			}
1072 		}
1073 
1074 		for (i = 0; i < dev->data->nb_tx_queues; i++) {
1075 			struct virtnet_tx *txvq = dev->data->tx_queues[i];
1076 			if (txvq == NULL)
1077 				continue;
1078 			for (t = 0; t < VIRTIO_NB_TXQ_XSTATS; t++) {
1079 				snprintf(xstats_names[count].name,
1080 					sizeof(xstats_names[count].name),
1081 					"tx_q%u_%s", i,
1082 					rte_virtio_txq_stat_strings[t].name);
1083 				count++;
1084 			}
1085 		}
1086 		return count;
1087 	}
1088 	return nstats;
1089 }
1090 
1091 static int
1092 virtio_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
1093 		      unsigned n)
1094 {
1095 	unsigned i;
1096 	unsigned count = 0;
1097 
1098 	unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_TXQ_XSTATS +
1099 		dev->data->nb_rx_queues * VIRTIO_NB_RXQ_XSTATS;
1100 
1101 	if (n < nstats)
1102 		return nstats;
1103 
1104 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
1105 		struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1106 
1107 		if (rxvq == NULL)
1108 			continue;
1109 
1110 		unsigned t;
1111 
1112 		for (t = 0; t < VIRTIO_NB_RXQ_XSTATS; t++) {
1113 			xstats[count].value = *(uint64_t *)(((char *)rxvq) +
1114 				rte_virtio_rxq_stat_strings[t].offset);
1115 			xstats[count].id = count;
1116 			count++;
1117 		}
1118 	}
1119 
1120 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
1121 		struct virtnet_tx *txvq = dev->data->tx_queues[i];
1122 
1123 		if (txvq == NULL)
1124 			continue;
1125 
1126 		unsigned t;
1127 
1128 		for (t = 0; t < VIRTIO_NB_TXQ_XSTATS; t++) {
1129 			xstats[count].value = *(uint64_t *)(((char *)txvq) +
1130 				rte_virtio_txq_stat_strings[t].offset);
1131 			xstats[count].id = count;
1132 			count++;
1133 		}
1134 	}
1135 
1136 	return count;
1137 }
1138 
1139 static int
1140 virtio_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1141 {
1142 	virtio_update_stats(dev, stats);
1143 
1144 	return 0;
1145 }
1146 
1147 static int
1148 virtio_dev_stats_reset(struct rte_eth_dev *dev)
1149 {
1150 	unsigned int i;
1151 
1152 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
1153 		struct virtnet_tx *txvq = dev->data->tx_queues[i];
1154 		if (txvq == NULL)
1155 			continue;
1156 
1157 		txvq->stats.packets = 0;
1158 		txvq->stats.bytes = 0;
1159 		txvq->stats.multicast = 0;
1160 		txvq->stats.broadcast = 0;
1161 		memset(txvq->stats.size_bins, 0,
1162 		       sizeof(txvq->stats.size_bins[0]) * 8);
1163 	}
1164 
1165 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
1166 		struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1167 		if (rxvq == NULL)
1168 			continue;
1169 
1170 		rxvq->stats.packets = 0;
1171 		rxvq->stats.bytes = 0;
1172 		rxvq->stats.errors = 0;
1173 		rxvq->stats.multicast = 0;
1174 		rxvq->stats.broadcast = 0;
1175 		memset(rxvq->stats.size_bins, 0,
1176 		       sizeof(rxvq->stats.size_bins[0]) * 8);
1177 	}
1178 
1179 	return 0;
1180 }
1181 
1182 static void
1183 virtio_set_hwaddr(struct virtio_hw *hw)
1184 {
1185 	virtio_write_dev_config(hw,
1186 			offsetof(struct virtio_net_config, mac),
1187 			&hw->mac_addr, RTE_ETHER_ADDR_LEN);
1188 }
1189 
1190 static void
1191 virtio_get_hwaddr(struct virtio_hw *hw)
1192 {
1193 	if (virtio_with_feature(hw, VIRTIO_NET_F_MAC)) {
1194 		virtio_read_dev_config(hw,
1195 			offsetof(struct virtio_net_config, mac),
1196 			&hw->mac_addr, RTE_ETHER_ADDR_LEN);
1197 	} else {
1198 		rte_eth_random_addr(&hw->mac_addr[0]);
1199 		virtio_set_hwaddr(hw);
1200 	}
1201 }
1202 
1203 static int
1204 virtio_mac_table_set(struct virtio_hw *hw,
1205 		     const struct virtio_net_ctrl_mac *uc,
1206 		     const struct virtio_net_ctrl_mac *mc)
1207 {
1208 	struct virtio_pmd_ctrl ctrl;
1209 	int err, len[2];
1210 
1211 	if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
1212 		PMD_DRV_LOG(INFO, "host does not support mac table");
1213 		return -1;
1214 	}
1215 
1216 	ctrl.hdr.class = VIRTIO_NET_CTRL_MAC;
1217 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
1218 
1219 	len[0] = uc->entries * RTE_ETHER_ADDR_LEN + sizeof(uc->entries);
1220 	memcpy(ctrl.data, uc, len[0]);
1221 
1222 	len[1] = mc->entries * RTE_ETHER_ADDR_LEN + sizeof(mc->entries);
1223 	memcpy(ctrl.data + len[0], mc, len[1]);
1224 
1225 	err = virtio_send_command(hw->cvq, &ctrl, len, 2);
1226 	if (err != 0)
1227 		PMD_DRV_LOG(NOTICE, "mac table set failed: %d", err);
1228 	return err;
1229 }
1230 
1231 static int
1232 virtio_mac_addr_add(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr,
1233 		    uint32_t index, uint32_t vmdq __rte_unused)
1234 {
1235 	struct virtio_hw *hw = dev->data->dev_private;
1236 	const struct rte_ether_addr *addrs = dev->data->mac_addrs;
1237 	unsigned int i;
1238 	struct virtio_net_ctrl_mac *uc, *mc;
1239 
1240 	if (index >= VIRTIO_MAX_MAC_ADDRS) {
1241 		PMD_DRV_LOG(ERR, "mac address index %u out of range", index);
1242 		return -EINVAL;
1243 	}
1244 
1245 	uc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1246 		sizeof(uc->entries));
1247 	uc->entries = 0;
1248 	mc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1249 		sizeof(mc->entries));
1250 	mc->entries = 0;
1251 
1252 	for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) {
1253 		const struct rte_ether_addr *addr
1254 			= (i == index) ? mac_addr : addrs + i;
1255 		struct virtio_net_ctrl_mac *tbl
1256 			= rte_is_multicast_ether_addr(addr) ? mc : uc;
1257 
1258 		memcpy(&tbl->macs[tbl->entries++], addr, RTE_ETHER_ADDR_LEN);
1259 	}
1260 
1261 	return virtio_mac_table_set(hw, uc, mc);
1262 }
1263 
1264 static void
1265 virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
1266 {
1267 	struct virtio_hw *hw = dev->data->dev_private;
1268 	struct rte_ether_addr *addrs = dev->data->mac_addrs;
1269 	struct virtio_net_ctrl_mac *uc, *mc;
1270 	unsigned int i;
1271 
1272 	if (index >= VIRTIO_MAX_MAC_ADDRS) {
1273 		PMD_DRV_LOG(ERR, "mac address index %u out of range", index);
1274 		return;
1275 	}
1276 
1277 	uc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1278 		sizeof(uc->entries));
1279 	uc->entries = 0;
1280 	mc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1281 		sizeof(mc->entries));
1282 	mc->entries = 0;
1283 
1284 	for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) {
1285 		struct virtio_net_ctrl_mac *tbl;
1286 
1287 		if (i == index || rte_is_zero_ether_addr(addrs + i))
1288 			continue;
1289 
1290 		tbl = rte_is_multicast_ether_addr(addrs + i) ? mc : uc;
1291 		memcpy(&tbl->macs[tbl->entries++], addrs + i,
1292 			RTE_ETHER_ADDR_LEN);
1293 	}
1294 
1295 	virtio_mac_table_set(hw, uc, mc);
1296 }
1297 
1298 static int
1299 virtio_mac_addr_set(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr)
1300 {
1301 	struct virtio_hw *hw = dev->data->dev_private;
1302 
1303 	memcpy(hw->mac_addr, mac_addr, RTE_ETHER_ADDR_LEN);
1304 
1305 	/* Use atomic update if available */
1306 	if (virtio_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
1307 		struct virtio_pmd_ctrl ctrl;
1308 		int len = RTE_ETHER_ADDR_LEN;
1309 
1310 		ctrl.hdr.class = VIRTIO_NET_CTRL_MAC;
1311 		ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
1312 
1313 		memcpy(ctrl.data, mac_addr, RTE_ETHER_ADDR_LEN);
1314 		return virtio_send_command(hw->cvq, &ctrl, &len, 1);
1315 	}
1316 
1317 	if (!virtio_with_feature(hw, VIRTIO_NET_F_MAC))
1318 		return -ENOTSUP;
1319 
1320 	virtio_set_hwaddr(hw);
1321 	return 0;
1322 }
1323 
1324 static int
1325 virtio_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
1326 {
1327 	struct virtio_hw *hw = dev->data->dev_private;
1328 	struct virtio_pmd_ctrl ctrl;
1329 	int len;
1330 
1331 	if (!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN))
1332 		return -ENOTSUP;
1333 
1334 	ctrl.hdr.class = VIRTIO_NET_CTRL_VLAN;
1335 	ctrl.hdr.cmd = on ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
1336 	memcpy(ctrl.data, &vlan_id, sizeof(vlan_id));
1337 	len = sizeof(vlan_id);
1338 
1339 	return virtio_send_command(hw->cvq, &ctrl, &len, 1);
1340 }
1341 
1342 static int
1343 virtio_intr_unmask(struct rte_eth_dev *dev)
1344 {
1345 	struct virtio_hw *hw = dev->data->dev_private;
1346 
1347 	if (rte_intr_ack(dev->intr_handle) < 0)
1348 		return -1;
1349 
1350 	if (VIRTIO_OPS(hw)->intr_detect)
1351 		VIRTIO_OPS(hw)->intr_detect(hw);
1352 
1353 	return 0;
1354 }
1355 
1356 static int
1357 virtio_intr_enable(struct rte_eth_dev *dev)
1358 {
1359 	struct virtio_hw *hw = dev->data->dev_private;
1360 
1361 	if (rte_intr_enable(dev->intr_handle) < 0)
1362 		return -1;
1363 
1364 	if (VIRTIO_OPS(hw)->intr_detect)
1365 		VIRTIO_OPS(hw)->intr_detect(hw);
1366 
1367 	return 0;
1368 }
1369 
1370 static int
1371 virtio_intr_disable(struct rte_eth_dev *dev)
1372 {
1373 	struct virtio_hw *hw = dev->data->dev_private;
1374 
1375 	if (rte_intr_disable(dev->intr_handle) < 0)
1376 		return -1;
1377 
1378 	if (VIRTIO_OPS(hw)->intr_detect)
1379 		VIRTIO_OPS(hw)->intr_detect(hw);
1380 
1381 	return 0;
1382 }
1383 
1384 static int
1385 virtio_ethdev_negotiate_features(struct virtio_hw *hw, uint64_t req_features)
1386 {
1387 	uint64_t host_features;
1388 
1389 	/* Prepare guest_features: feature that driver wants to support */
1390 	PMD_INIT_LOG(DEBUG, "guest_features before negotiate = %" PRIx64,
1391 		req_features);
1392 
1393 	/* Read device(host) feature bits */
1394 	host_features = VIRTIO_OPS(hw)->get_features(hw);
1395 	PMD_INIT_LOG(DEBUG, "host_features before negotiate = %" PRIx64,
1396 		host_features);
1397 
1398 	/* If supported, ensure MTU value is valid before acknowledging it. */
1399 	if (host_features & req_features & (1ULL << VIRTIO_NET_F_MTU)) {
1400 		struct virtio_net_config config;
1401 
1402 		virtio_read_dev_config(hw,
1403 			offsetof(struct virtio_net_config, mtu),
1404 			&config.mtu, sizeof(config.mtu));
1405 
1406 		if (config.mtu < RTE_ETHER_MIN_MTU)
1407 			req_features &= ~(1ULL << VIRTIO_NET_F_MTU);
1408 	}
1409 
1410 	/*
1411 	 * Negotiate features: Subset of device feature bits are written back
1412 	 * guest feature bits.
1413 	 */
1414 	hw->guest_features = req_features;
1415 	hw->guest_features = virtio_negotiate_features(hw, host_features);
1416 	PMD_INIT_LOG(DEBUG, "features after negotiate = %" PRIx64,
1417 		hw->guest_features);
1418 
1419 	if (VIRTIO_OPS(hw)->features_ok(hw) < 0)
1420 		return -1;
1421 
1422 	if (virtio_with_feature(hw, VIRTIO_F_VERSION_1)) {
1423 		virtio_set_status(hw, VIRTIO_CONFIG_STATUS_FEATURES_OK);
1424 
1425 		if (!(virtio_get_status(hw) & VIRTIO_CONFIG_STATUS_FEATURES_OK)) {
1426 			PMD_INIT_LOG(ERR, "Failed to set FEATURES_OK status!");
1427 			return -1;
1428 		}
1429 	}
1430 
1431 	hw->req_guest_features = req_features;
1432 
1433 	return 0;
1434 }
1435 
1436 int
1437 virtio_dev_pause(struct rte_eth_dev *dev)
1438 {
1439 	struct virtio_hw *hw = dev->data->dev_private;
1440 
1441 	rte_spinlock_lock(&hw->state_lock);
1442 
1443 	if (hw->started == 0) {
1444 		/* Device is just stopped. */
1445 		rte_spinlock_unlock(&hw->state_lock);
1446 		return -1;
1447 	}
1448 	hw->started = 0;
1449 	/*
1450 	 * Prevent the worker threads from touching queues to avoid contention,
1451 	 * 1 ms should be enough for the ongoing Tx function to finish.
1452 	 */
1453 	rte_delay_ms(1);
1454 	return 0;
1455 }
1456 
1457 /*
1458  * Recover hw state to let the worker threads continue.
1459  */
1460 void
1461 virtio_dev_resume(struct rte_eth_dev *dev)
1462 {
1463 	struct virtio_hw *hw = dev->data->dev_private;
1464 
1465 	hw->started = 1;
1466 	rte_spinlock_unlock(&hw->state_lock);
1467 }
1468 
1469 /*
1470  * Should be called only after device is paused.
1471  */
1472 int
1473 virtio_inject_pkts(struct rte_eth_dev *dev, struct rte_mbuf **tx_pkts,
1474 		int nb_pkts)
1475 {
1476 	struct virtio_hw *hw = dev->data->dev_private;
1477 	struct virtnet_tx *txvq = dev->data->tx_queues[0];
1478 	int ret;
1479 
1480 	hw->inject_pkts = tx_pkts;
1481 	ret = dev->tx_pkt_burst(txvq, tx_pkts, nb_pkts);
1482 	hw->inject_pkts = NULL;
1483 
1484 	return ret;
1485 }
1486 
1487 static void
1488 virtio_notify_peers(struct rte_eth_dev *dev)
1489 {
1490 	struct virtio_hw *hw = dev->data->dev_private;
1491 	struct virtnet_rx *rxvq;
1492 	struct rte_mbuf *rarp_mbuf;
1493 
1494 	if (!dev->data->rx_queues)
1495 		return;
1496 
1497 	rxvq = dev->data->rx_queues[0];
1498 	if (!rxvq)
1499 		return;
1500 
1501 	rarp_mbuf = rte_net_make_rarp_packet(rxvq->mpool,
1502 			(struct rte_ether_addr *)hw->mac_addr);
1503 	if (rarp_mbuf == NULL) {
1504 		PMD_DRV_LOG(ERR, "failed to make RARP packet.");
1505 		return;
1506 	}
1507 
1508 	/* If virtio port just stopped, no need to send RARP */
1509 	if (virtio_dev_pause(dev) < 0) {
1510 		rte_pktmbuf_free(rarp_mbuf);
1511 		return;
1512 	}
1513 
1514 	virtio_inject_pkts(dev, &rarp_mbuf, 1);
1515 	virtio_dev_resume(dev);
1516 }
1517 
1518 static void
1519 virtio_ack_link_announce(struct rte_eth_dev *dev)
1520 {
1521 	struct virtio_hw *hw = dev->data->dev_private;
1522 	struct virtio_pmd_ctrl ctrl;
1523 
1524 	ctrl.hdr.class = VIRTIO_NET_CTRL_ANNOUNCE;
1525 	ctrl.hdr.cmd = VIRTIO_NET_CTRL_ANNOUNCE_ACK;
1526 
1527 	virtio_send_command(hw->cvq, &ctrl, NULL, 0);
1528 }
1529 
1530 /*
1531  * Process virtio config changed interrupt. Call the callback
1532  * if link state changed, generate gratuitous RARP packet if
1533  * the status indicates an ANNOUNCE.
1534  */
1535 void
1536 virtio_interrupt_handler(void *param)
1537 {
1538 	struct rte_eth_dev *dev = param;
1539 	struct virtio_hw *hw = dev->data->dev_private;
1540 	uint8_t isr;
1541 	uint16_t status;
1542 
1543 	/* Read interrupt status which clears interrupt */
1544 	isr = virtio_get_isr(hw);
1545 	PMD_DRV_LOG(INFO, "interrupt status = %#x", isr);
1546 
1547 	if (virtio_intr_unmask(dev) < 0)
1548 		PMD_DRV_LOG(ERR, "interrupt enable failed");
1549 
1550 	if (isr & VIRTIO_ISR_CONFIG) {
1551 		if (virtio_dev_link_update(dev, 0) == 0)
1552 			rte_eth_dev_callback_process(dev,
1553 						     RTE_ETH_EVENT_INTR_LSC,
1554 						     NULL);
1555 
1556 		if (virtio_with_feature(hw, VIRTIO_NET_F_STATUS)) {
1557 			virtio_read_dev_config(hw,
1558 				offsetof(struct virtio_net_config, status),
1559 				&status, sizeof(status));
1560 			if (status & VIRTIO_NET_S_ANNOUNCE) {
1561 				virtio_notify_peers(dev);
1562 				if (hw->cvq)
1563 					virtio_ack_link_announce(dev);
1564 			}
1565 		}
1566 	}
1567 }
1568 
1569 /* set rx and tx handlers according to what is supported */
1570 static void
1571 set_rxtx_funcs(struct rte_eth_dev *eth_dev)
1572 {
1573 	struct virtio_hw *hw = eth_dev->data->dev_private;
1574 
1575 	eth_dev->tx_pkt_prepare = virtio_xmit_pkts_prepare;
1576 	if (virtio_with_packed_queue(hw)) {
1577 		PMD_INIT_LOG(INFO,
1578 			"virtio: using packed ring %s Tx path on port %u",
1579 			hw->use_vec_tx ? "vectorized" : "standard",
1580 			eth_dev->data->port_id);
1581 		if (hw->use_vec_tx)
1582 			eth_dev->tx_pkt_burst = virtio_xmit_pkts_packed_vec;
1583 		else
1584 			eth_dev->tx_pkt_burst = virtio_xmit_pkts_packed;
1585 	} else {
1586 		if (hw->use_inorder_tx) {
1587 			PMD_INIT_LOG(INFO, "virtio: using inorder Tx path on port %u",
1588 				eth_dev->data->port_id);
1589 			eth_dev->tx_pkt_burst = virtio_xmit_pkts_inorder;
1590 		} else {
1591 			PMD_INIT_LOG(INFO, "virtio: using standard Tx path on port %u",
1592 				eth_dev->data->port_id);
1593 			eth_dev->tx_pkt_burst = virtio_xmit_pkts;
1594 		}
1595 	}
1596 
1597 	if (virtio_with_packed_queue(hw)) {
1598 		if (hw->use_vec_rx) {
1599 			PMD_INIT_LOG(INFO,
1600 				"virtio: using packed ring vectorized Rx path on port %u",
1601 				eth_dev->data->port_id);
1602 			eth_dev->rx_pkt_burst =
1603 				&virtio_recv_pkts_packed_vec;
1604 		} else if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1605 			PMD_INIT_LOG(INFO,
1606 				"virtio: using packed ring mergeable buffer Rx path on port %u",
1607 				eth_dev->data->port_id);
1608 			eth_dev->rx_pkt_burst =
1609 				&virtio_recv_mergeable_pkts_packed;
1610 		} else {
1611 			PMD_INIT_LOG(INFO,
1612 				"virtio: using packed ring standard Rx path on port %u",
1613 				eth_dev->data->port_id);
1614 			eth_dev->rx_pkt_burst = &virtio_recv_pkts_packed;
1615 		}
1616 	} else {
1617 		if (hw->use_vec_rx) {
1618 			PMD_INIT_LOG(INFO, "virtio: using vectorized Rx path on port %u",
1619 				eth_dev->data->port_id);
1620 			eth_dev->rx_pkt_burst = virtio_recv_pkts_vec;
1621 		} else if (hw->use_inorder_rx) {
1622 			PMD_INIT_LOG(INFO,
1623 				"virtio: using inorder Rx path on port %u",
1624 				eth_dev->data->port_id);
1625 			eth_dev->rx_pkt_burst =	&virtio_recv_pkts_inorder;
1626 		} else if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1627 			PMD_INIT_LOG(INFO,
1628 				"virtio: using mergeable buffer Rx path on port %u",
1629 				eth_dev->data->port_id);
1630 			eth_dev->rx_pkt_burst = &virtio_recv_mergeable_pkts;
1631 		} else {
1632 			PMD_INIT_LOG(INFO, "virtio: using standard Rx path on port %u",
1633 				eth_dev->data->port_id);
1634 			eth_dev->rx_pkt_burst = &virtio_recv_pkts;
1635 		}
1636 	}
1637 
1638 }
1639 
1640 /* Only support 1:1 queue/interrupt mapping so far.
1641  * TODO: support n:1 queue/interrupt mapping when there are limited number of
1642  * interrupt vectors (<N+1).
1643  */
1644 static int
1645 virtio_queues_bind_intr(struct rte_eth_dev *dev)
1646 {
1647 	uint32_t i;
1648 	struct virtio_hw *hw = dev->data->dev_private;
1649 
1650 	PMD_INIT_LOG(INFO, "queue/interrupt binding");
1651 	for (i = 0; i < dev->data->nb_rx_queues; ++i) {
1652 		dev->intr_handle->intr_vec[i] = i + 1;
1653 		if (VIRTIO_OPS(hw)->set_queue_irq(hw, hw->vqs[i * 2], i + 1) ==
1654 						 VIRTIO_MSI_NO_VECTOR) {
1655 			PMD_DRV_LOG(ERR, "failed to set queue vector");
1656 			return -EBUSY;
1657 		}
1658 	}
1659 
1660 	return 0;
1661 }
1662 
1663 static void
1664 virtio_queues_unbind_intr(struct rte_eth_dev *dev)
1665 {
1666 	uint32_t i;
1667 	struct virtio_hw *hw = dev->data->dev_private;
1668 
1669 	PMD_INIT_LOG(INFO, "queue/interrupt unbinding");
1670 	for (i = 0; i < dev->data->nb_rx_queues; ++i)
1671 		VIRTIO_OPS(hw)->set_queue_irq(hw,
1672 					     hw->vqs[i * VTNET_CQ],
1673 					     VIRTIO_MSI_NO_VECTOR);
1674 }
1675 
1676 static int
1677 virtio_configure_intr(struct rte_eth_dev *dev)
1678 {
1679 	struct virtio_hw *hw = dev->data->dev_private;
1680 
1681 	if (!rte_intr_cap_multiple(dev->intr_handle)) {
1682 		PMD_INIT_LOG(ERR, "Multiple intr vector not supported");
1683 		return -ENOTSUP;
1684 	}
1685 
1686 	if (rte_intr_efd_enable(dev->intr_handle, dev->data->nb_rx_queues)) {
1687 		PMD_INIT_LOG(ERR, "Fail to create eventfd");
1688 		return -1;
1689 	}
1690 
1691 	if (!dev->intr_handle->intr_vec) {
1692 		dev->intr_handle->intr_vec =
1693 			rte_zmalloc("intr_vec",
1694 				    hw->max_queue_pairs * sizeof(int), 0);
1695 		if (!dev->intr_handle->intr_vec) {
1696 			PMD_INIT_LOG(ERR, "Failed to allocate %u rxq vectors",
1697 				     hw->max_queue_pairs);
1698 			return -ENOMEM;
1699 		}
1700 	}
1701 
1702 	if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1703 		/* Re-register callback to update max_intr */
1704 		rte_intr_callback_unregister(dev->intr_handle,
1705 					     virtio_interrupt_handler,
1706 					     dev);
1707 		rte_intr_callback_register(dev->intr_handle,
1708 					   virtio_interrupt_handler,
1709 					   dev);
1710 	}
1711 
1712 	/* DO NOT try to remove this! This function will enable msix, or QEMU
1713 	 * will encounter SIGSEGV when DRIVER_OK is sent.
1714 	 * And for legacy devices, this should be done before queue/vec binding
1715 	 * to change the config size from 20 to 24, or VIRTIO_MSI_QUEUE_VECTOR
1716 	 * (22) will be ignored.
1717 	 */
1718 	if (virtio_intr_enable(dev) < 0) {
1719 		PMD_DRV_LOG(ERR, "interrupt enable failed");
1720 		return -1;
1721 	}
1722 
1723 	if (virtio_queues_bind_intr(dev) < 0) {
1724 		PMD_INIT_LOG(ERR, "Failed to bind queue/interrupt");
1725 		return -1;
1726 	}
1727 
1728 	return 0;
1729 }
1730 #define DUPLEX_UNKNOWN   0xff
1731 /* reset device and renegotiate features if needed */
1732 static int
1733 virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features)
1734 {
1735 	struct virtio_hw *hw = eth_dev->data->dev_private;
1736 	struct virtio_net_config *config;
1737 	struct virtio_net_config local_config;
1738 	int ret;
1739 
1740 	/* Reset the device although not necessary at startup */
1741 	virtio_reset(hw);
1742 
1743 	if (hw->vqs) {
1744 		virtio_dev_free_mbufs(eth_dev);
1745 		virtio_free_queues(hw);
1746 	}
1747 
1748 	/* Tell the host we've noticed this device. */
1749 	virtio_set_status(hw, VIRTIO_CONFIG_STATUS_ACK);
1750 
1751 	/* Tell the host we've known how to drive the device. */
1752 	virtio_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER);
1753 	if (virtio_ethdev_negotiate_features(hw, req_features) < 0)
1754 		return -1;
1755 
1756 	hw->weak_barriers = !virtio_with_feature(hw, VIRTIO_F_ORDER_PLATFORM);
1757 
1758 	/* If host does not support both status and MSI-X then disable LSC */
1759 	if (virtio_with_feature(hw, VIRTIO_NET_F_STATUS) && hw->intr_lsc)
1760 		eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
1761 	else
1762 		eth_dev->data->dev_flags &= ~RTE_ETH_DEV_INTR_LSC;
1763 
1764 	eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
1765 
1766 	/* Setting up rx_header size for the device */
1767 	if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF) ||
1768 	    virtio_with_feature(hw, VIRTIO_F_VERSION_1) ||
1769 	    virtio_with_packed_queue(hw))
1770 		hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
1771 	else
1772 		hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
1773 
1774 	/* Copy the permanent MAC address to: virtio_hw */
1775 	virtio_get_hwaddr(hw);
1776 	rte_ether_addr_copy((struct rte_ether_addr *)hw->mac_addr,
1777 			&eth_dev->data->mac_addrs[0]);
1778 	PMD_INIT_LOG(DEBUG,
1779 		     "PORT MAC: " RTE_ETHER_ADDR_PRT_FMT,
1780 		     hw->mac_addr[0], hw->mac_addr[1], hw->mac_addr[2],
1781 		     hw->mac_addr[3], hw->mac_addr[4], hw->mac_addr[5]);
1782 
1783 	if (hw->speed == ETH_SPEED_NUM_UNKNOWN) {
1784 		if (virtio_with_feature(hw, VIRTIO_NET_F_SPEED_DUPLEX)) {
1785 			config = &local_config;
1786 			virtio_read_dev_config(hw,
1787 				offsetof(struct virtio_net_config, speed),
1788 				&config->speed, sizeof(config->speed));
1789 			virtio_read_dev_config(hw,
1790 				offsetof(struct virtio_net_config, duplex),
1791 				&config->duplex, sizeof(config->duplex));
1792 			hw->speed = config->speed;
1793 			hw->duplex = config->duplex;
1794 		}
1795 	}
1796 	if (hw->duplex == DUPLEX_UNKNOWN)
1797 		hw->duplex = ETH_LINK_FULL_DUPLEX;
1798 	PMD_INIT_LOG(DEBUG, "link speed = %d, duplex = %d",
1799 		hw->speed, hw->duplex);
1800 	if (virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VQ)) {
1801 		config = &local_config;
1802 
1803 		virtio_read_dev_config(hw,
1804 			offsetof(struct virtio_net_config, mac),
1805 			&config->mac, sizeof(config->mac));
1806 
1807 		if (virtio_with_feature(hw, VIRTIO_NET_F_STATUS)) {
1808 			virtio_read_dev_config(hw,
1809 				offsetof(struct virtio_net_config, status),
1810 				&config->status, sizeof(config->status));
1811 		} else {
1812 			PMD_INIT_LOG(DEBUG,
1813 				     "VIRTIO_NET_F_STATUS is not supported");
1814 			config->status = 0;
1815 		}
1816 
1817 		if (virtio_with_feature(hw, VIRTIO_NET_F_MQ)) {
1818 			virtio_read_dev_config(hw,
1819 				offsetof(struct virtio_net_config, max_virtqueue_pairs),
1820 				&config->max_virtqueue_pairs,
1821 				sizeof(config->max_virtqueue_pairs));
1822 		} else {
1823 			PMD_INIT_LOG(DEBUG,
1824 				     "VIRTIO_NET_F_MQ is not supported");
1825 			config->max_virtqueue_pairs = 1;
1826 		}
1827 
1828 		hw->max_queue_pairs = config->max_virtqueue_pairs;
1829 
1830 		if (virtio_with_feature(hw, VIRTIO_NET_F_MTU)) {
1831 			virtio_read_dev_config(hw,
1832 				offsetof(struct virtio_net_config, mtu),
1833 				&config->mtu,
1834 				sizeof(config->mtu));
1835 
1836 			/*
1837 			 * MTU value has already been checked at negotiation
1838 			 * time, but check again in case it has changed since
1839 			 * then, which should not happen.
1840 			 */
1841 			if (config->mtu < RTE_ETHER_MIN_MTU) {
1842 				PMD_INIT_LOG(ERR, "invalid max MTU value (%u)",
1843 						config->mtu);
1844 				return -1;
1845 			}
1846 
1847 			hw->max_mtu = config->mtu;
1848 			/* Set initial MTU to maximum one supported by vhost */
1849 			eth_dev->data->mtu = config->mtu;
1850 
1851 		} else {
1852 			hw->max_mtu = VIRTIO_MAX_RX_PKTLEN - RTE_ETHER_HDR_LEN -
1853 				VLAN_TAG_LEN - hw->vtnet_hdr_size;
1854 		}
1855 
1856 		PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=%d",
1857 				config->max_virtqueue_pairs);
1858 		PMD_INIT_LOG(DEBUG, "config->status=%d", config->status);
1859 		PMD_INIT_LOG(DEBUG,
1860 				"PORT MAC: " RTE_ETHER_ADDR_PRT_FMT,
1861 				config->mac[0], config->mac[1],
1862 				config->mac[2], config->mac[3],
1863 				config->mac[4], config->mac[5]);
1864 	} else {
1865 		PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=1");
1866 		hw->max_queue_pairs = 1;
1867 		hw->max_mtu = VIRTIO_MAX_RX_PKTLEN - RTE_ETHER_HDR_LEN -
1868 			VLAN_TAG_LEN - hw->vtnet_hdr_size;
1869 	}
1870 
1871 	ret = virtio_alloc_queues(eth_dev);
1872 	if (ret < 0)
1873 		return ret;
1874 
1875 	if (eth_dev->data->dev_conf.intr_conf.rxq) {
1876 		if (virtio_configure_intr(eth_dev) < 0) {
1877 			PMD_INIT_LOG(ERR, "failed to configure interrupt");
1878 			virtio_free_queues(hw);
1879 			return -1;
1880 		}
1881 	}
1882 
1883 	virtio_reinit_complete(hw);
1884 
1885 	return 0;
1886 }
1887 
1888 /*
1889  * This function is based on probe() function in virtio_pci.c
1890  * It returns 0 on success.
1891  */
1892 int
1893 eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
1894 {
1895 	struct virtio_hw *hw = eth_dev->data->dev_private;
1896 	uint32_t speed = ETH_SPEED_NUM_UNKNOWN;
1897 	int vectorized = 0;
1898 	int ret;
1899 
1900 	if (sizeof(struct virtio_net_hdr_mrg_rxbuf) > RTE_PKTMBUF_HEADROOM) {
1901 		PMD_INIT_LOG(ERR,
1902 			"Not sufficient headroom required = %d, avail = %d",
1903 			(int)sizeof(struct virtio_net_hdr_mrg_rxbuf),
1904 			RTE_PKTMBUF_HEADROOM);
1905 
1906 		return -1;
1907 	}
1908 
1909 	eth_dev->dev_ops = &virtio_eth_dev_ops;
1910 	eth_dev->rx_descriptor_done = virtio_dev_rx_queue_done;
1911 
1912 	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1913 		set_rxtx_funcs(eth_dev);
1914 		return 0;
1915 	}
1916 
1917 	ret = virtio_dev_devargs_parse(eth_dev->device->devargs, &speed, &vectorized);
1918 	if (ret < 0)
1919 		return ret;
1920 	hw->speed = speed;
1921 	hw->duplex = DUPLEX_UNKNOWN;
1922 
1923 	/* Allocate memory for storing MAC addresses */
1924 	eth_dev->data->mac_addrs = rte_zmalloc("virtio",
1925 				VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN, 0);
1926 	if (eth_dev->data->mac_addrs == NULL) {
1927 		PMD_INIT_LOG(ERR,
1928 			"Failed to allocate %d bytes needed to store MAC addresses",
1929 			VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN);
1930 		return -ENOMEM;
1931 	}
1932 
1933 	rte_spinlock_init(&hw->state_lock);
1934 
1935 	/* reset device and negotiate default features */
1936 	ret = virtio_init_device(eth_dev, VIRTIO_PMD_DEFAULT_GUEST_FEATURES);
1937 	if (ret < 0)
1938 		goto err_virtio_init;
1939 
1940 	if (vectorized) {
1941 		if (!virtio_with_packed_queue(hw)) {
1942 			hw->use_vec_rx = 1;
1943 		} else {
1944 #if defined(CC_AVX512_SUPPORT) || defined(RTE_ARCH_ARM)
1945 			hw->use_vec_rx = 1;
1946 			hw->use_vec_tx = 1;
1947 #else
1948 			PMD_DRV_LOG(INFO,
1949 				"building environment do not support packed ring vectorized");
1950 #endif
1951 		}
1952 	}
1953 
1954 	hw->opened = 1;
1955 
1956 	return 0;
1957 
1958 err_virtio_init:
1959 	rte_free(eth_dev->data->mac_addrs);
1960 	eth_dev->data->mac_addrs = NULL;
1961 	return ret;
1962 }
1963 
1964 static uint32_t
1965 virtio_dev_speed_capa_get(uint32_t speed)
1966 {
1967 	switch (speed) {
1968 	case ETH_SPEED_NUM_10G:
1969 		return ETH_LINK_SPEED_10G;
1970 	case ETH_SPEED_NUM_20G:
1971 		return ETH_LINK_SPEED_20G;
1972 	case ETH_SPEED_NUM_25G:
1973 		return ETH_LINK_SPEED_25G;
1974 	case ETH_SPEED_NUM_40G:
1975 		return ETH_LINK_SPEED_40G;
1976 	case ETH_SPEED_NUM_50G:
1977 		return ETH_LINK_SPEED_50G;
1978 	case ETH_SPEED_NUM_56G:
1979 		return ETH_LINK_SPEED_56G;
1980 	case ETH_SPEED_NUM_100G:
1981 		return ETH_LINK_SPEED_100G;
1982 	case ETH_SPEED_NUM_200G:
1983 		return ETH_LINK_SPEED_200G;
1984 	default:
1985 		return 0;
1986 	}
1987 }
1988 
1989 static int vectorized_check_handler(__rte_unused const char *key,
1990 		const char *value, void *ret_val)
1991 {
1992 	if (strcmp(value, "1") == 0)
1993 		*(int *)ret_val = 1;
1994 	else
1995 		*(int *)ret_val = 0;
1996 
1997 	return 0;
1998 }
1999 
2000 #define VIRTIO_ARG_SPEED      "speed"
2001 #define VIRTIO_ARG_VECTORIZED "vectorized"
2002 
2003 static int
2004 link_speed_handler(const char *key __rte_unused,
2005 		const char *value, void *ret_val)
2006 {
2007 	uint32_t val;
2008 	if (!value || !ret_val)
2009 		return -EINVAL;
2010 	val = strtoul(value, NULL, 0);
2011 	/* validate input */
2012 	if (virtio_dev_speed_capa_get(val) == 0)
2013 		return -EINVAL;
2014 	*(uint32_t *)ret_val = val;
2015 
2016 	return 0;
2017 }
2018 
2019 
2020 static int
2021 virtio_dev_devargs_parse(struct rte_devargs *devargs, uint32_t *speed, int *vectorized)
2022 {
2023 	struct rte_kvargs *kvlist;
2024 	int ret = 0;
2025 
2026 	if (devargs == NULL)
2027 		return 0;
2028 
2029 	kvlist = rte_kvargs_parse(devargs->args, NULL);
2030 	if (kvlist == NULL) {
2031 		PMD_INIT_LOG(ERR, "error when parsing param");
2032 		return 0;
2033 	}
2034 
2035 	if (speed && rte_kvargs_count(kvlist, VIRTIO_ARG_SPEED) == 1) {
2036 		ret = rte_kvargs_process(kvlist,
2037 					VIRTIO_ARG_SPEED,
2038 					link_speed_handler, speed);
2039 		if (ret < 0) {
2040 			PMD_INIT_LOG(ERR, "Failed to parse %s",
2041 					VIRTIO_ARG_SPEED);
2042 			goto exit;
2043 		}
2044 	}
2045 
2046 	if (vectorized &&
2047 		rte_kvargs_count(kvlist, VIRTIO_ARG_VECTORIZED) == 1) {
2048 		ret = rte_kvargs_process(kvlist,
2049 				VIRTIO_ARG_VECTORIZED,
2050 				vectorized_check_handler, vectorized);
2051 		if (ret < 0) {
2052 			PMD_INIT_LOG(ERR, "Failed to parse %s",
2053 					VIRTIO_ARG_VECTORIZED);
2054 			goto exit;
2055 		}
2056 	}
2057 
2058 exit:
2059 	rte_kvargs_free(kvlist);
2060 	return ret;
2061 }
2062 
2063 static uint8_t
2064 rx_offload_enabled(struct virtio_hw *hw)
2065 {
2066 	return virtio_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM) ||
2067 		virtio_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) ||
2068 		virtio_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6);
2069 }
2070 
2071 static uint8_t
2072 tx_offload_enabled(struct virtio_hw *hw)
2073 {
2074 	return virtio_with_feature(hw, VIRTIO_NET_F_CSUM) ||
2075 		virtio_with_feature(hw, VIRTIO_NET_F_HOST_TSO4) ||
2076 		virtio_with_feature(hw, VIRTIO_NET_F_HOST_TSO6);
2077 }
2078 
2079 /*
2080  * Configure virtio device
2081  * It returns 0 on success.
2082  */
2083 static int
2084 virtio_dev_configure(struct rte_eth_dev *dev)
2085 {
2086 	const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
2087 	const struct rte_eth_txmode *txmode = &dev->data->dev_conf.txmode;
2088 	struct virtio_hw *hw = dev->data->dev_private;
2089 	uint32_t ether_hdr_len = RTE_ETHER_HDR_LEN + VLAN_TAG_LEN +
2090 		hw->vtnet_hdr_size;
2091 	uint64_t rx_offloads = rxmode->offloads;
2092 	uint64_t tx_offloads = txmode->offloads;
2093 	uint64_t req_features;
2094 	int ret;
2095 
2096 	PMD_INIT_LOG(DEBUG, "configure");
2097 	req_features = VIRTIO_PMD_DEFAULT_GUEST_FEATURES;
2098 
2099 	if (rxmode->mq_mode != ETH_MQ_RX_NONE) {
2100 		PMD_DRV_LOG(ERR,
2101 			"Unsupported Rx multi queue mode %d",
2102 			rxmode->mq_mode);
2103 		return -EINVAL;
2104 	}
2105 
2106 	if (txmode->mq_mode != ETH_MQ_TX_NONE) {
2107 		PMD_DRV_LOG(ERR,
2108 			"Unsupported Tx multi queue mode %d",
2109 			txmode->mq_mode);
2110 		return -EINVAL;
2111 	}
2112 
2113 	if (dev->data->dev_conf.intr_conf.rxq) {
2114 		ret = virtio_init_device(dev, hw->req_guest_features);
2115 		if (ret < 0)
2116 			return ret;
2117 	}
2118 
2119 	if ((rx_offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) &&
2120 	    (rxmode->max_rx_pkt_len > hw->max_mtu + ether_hdr_len))
2121 		req_features &= ~(1ULL << VIRTIO_NET_F_MTU);
2122 
2123 	if (rx_offloads & DEV_RX_OFFLOAD_JUMBO_FRAME)
2124 		hw->max_rx_pkt_len = rxmode->max_rx_pkt_len;
2125 	else
2126 		hw->max_rx_pkt_len = ether_hdr_len + dev->data->mtu;
2127 
2128 	if (rx_offloads & (DEV_RX_OFFLOAD_UDP_CKSUM |
2129 			   DEV_RX_OFFLOAD_TCP_CKSUM))
2130 		req_features |= (1ULL << VIRTIO_NET_F_GUEST_CSUM);
2131 
2132 	if (rx_offloads & DEV_RX_OFFLOAD_TCP_LRO)
2133 		req_features |=
2134 			(1ULL << VIRTIO_NET_F_GUEST_TSO4) |
2135 			(1ULL << VIRTIO_NET_F_GUEST_TSO6);
2136 
2137 	if (tx_offloads & (DEV_TX_OFFLOAD_UDP_CKSUM |
2138 			   DEV_TX_OFFLOAD_TCP_CKSUM))
2139 		req_features |= (1ULL << VIRTIO_NET_F_CSUM);
2140 
2141 	if (tx_offloads & DEV_TX_OFFLOAD_TCP_TSO)
2142 		req_features |=
2143 			(1ULL << VIRTIO_NET_F_HOST_TSO4) |
2144 			(1ULL << VIRTIO_NET_F_HOST_TSO6);
2145 
2146 	/* if request features changed, reinit the device */
2147 	if (req_features != hw->req_guest_features) {
2148 		ret = virtio_init_device(dev, req_features);
2149 		if (ret < 0)
2150 			return ret;
2151 	}
2152 
2153 	if ((rx_offloads & (DEV_RX_OFFLOAD_UDP_CKSUM |
2154 			    DEV_RX_OFFLOAD_TCP_CKSUM)) &&
2155 		!virtio_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM)) {
2156 		PMD_DRV_LOG(ERR,
2157 			"rx checksum not available on this host");
2158 		return -ENOTSUP;
2159 	}
2160 
2161 	if ((rx_offloads & DEV_RX_OFFLOAD_TCP_LRO) &&
2162 		(!virtio_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) ||
2163 		 !virtio_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6))) {
2164 		PMD_DRV_LOG(ERR,
2165 			"Large Receive Offload not available on this host");
2166 		return -ENOTSUP;
2167 	}
2168 
2169 	/* start control queue */
2170 	if (virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VQ))
2171 		virtio_dev_cq_start(dev);
2172 
2173 	if (rx_offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
2174 		hw->vlan_strip = 1;
2175 
2176 	hw->rx_ol_scatter = (rx_offloads & DEV_RX_OFFLOAD_SCATTER);
2177 
2178 	if ((rx_offloads & DEV_RX_OFFLOAD_VLAN_FILTER) &&
2179 			!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) {
2180 		PMD_DRV_LOG(ERR,
2181 			    "vlan filtering not available on this host");
2182 		return -ENOTSUP;
2183 	}
2184 
2185 	hw->has_tx_offload = tx_offload_enabled(hw);
2186 	hw->has_rx_offload = rx_offload_enabled(hw);
2187 
2188 	if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
2189 		/* Enable vector (0) for Link State Intrerrupt */
2190 		if (VIRTIO_OPS(hw)->set_config_irq(hw, 0) ==
2191 				VIRTIO_MSI_NO_VECTOR) {
2192 			PMD_DRV_LOG(ERR, "failed to set config vector");
2193 			return -EBUSY;
2194 		}
2195 
2196 	if (virtio_with_packed_queue(hw)) {
2197 #if defined(RTE_ARCH_X86_64) && defined(CC_AVX512_SUPPORT)
2198 		if ((hw->use_vec_rx || hw->use_vec_tx) &&
2199 		    (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) ||
2200 		     !virtio_with_feature(hw, VIRTIO_F_IN_ORDER) ||
2201 		     !virtio_with_feature(hw, VIRTIO_F_VERSION_1) ||
2202 		     rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_512)) {
2203 			PMD_DRV_LOG(INFO,
2204 				"disabled packed ring vectorized path for requirements not met");
2205 			hw->use_vec_rx = 0;
2206 			hw->use_vec_tx = 0;
2207 		}
2208 #elif defined(RTE_ARCH_ARM)
2209 		if ((hw->use_vec_rx || hw->use_vec_tx) &&
2210 		    (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON) ||
2211 		     !virtio_with_feature(hw, VIRTIO_F_IN_ORDER) ||
2212 		     !virtio_with_feature(hw, VIRTIO_F_VERSION_1) ||
2213 		     rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128)) {
2214 			PMD_DRV_LOG(INFO,
2215 				"disabled packed ring vectorized path for requirements not met");
2216 			hw->use_vec_rx = 0;
2217 			hw->use_vec_tx = 0;
2218 		}
2219 #else
2220 		hw->use_vec_rx = 0;
2221 		hw->use_vec_tx = 0;
2222 #endif
2223 
2224 		if (hw->use_vec_rx) {
2225 			if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
2226 				PMD_DRV_LOG(INFO,
2227 					"disabled packed ring vectorized rx for mrg_rxbuf enabled");
2228 				hw->use_vec_rx = 0;
2229 			}
2230 
2231 			if (rx_offloads & DEV_RX_OFFLOAD_TCP_LRO) {
2232 				PMD_DRV_LOG(INFO,
2233 					"disabled packed ring vectorized rx for TCP_LRO enabled");
2234 				hw->use_vec_rx = 0;
2235 			}
2236 		}
2237 	} else {
2238 		if (virtio_with_feature(hw, VIRTIO_F_IN_ORDER)) {
2239 			hw->use_inorder_tx = 1;
2240 			hw->use_inorder_rx = 1;
2241 			hw->use_vec_rx = 0;
2242 		}
2243 
2244 		if (hw->use_vec_rx) {
2245 #if defined RTE_ARCH_ARM
2246 			if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON)) {
2247 				PMD_DRV_LOG(INFO,
2248 					"disabled split ring vectorized path for requirement not met");
2249 				hw->use_vec_rx = 0;
2250 			}
2251 #endif
2252 			if (virtio_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
2253 				PMD_DRV_LOG(INFO,
2254 					"disabled split ring vectorized rx for mrg_rxbuf enabled");
2255 				hw->use_vec_rx = 0;
2256 			}
2257 
2258 			if (rx_offloads & (DEV_RX_OFFLOAD_UDP_CKSUM |
2259 					   DEV_RX_OFFLOAD_TCP_CKSUM |
2260 					   DEV_RX_OFFLOAD_TCP_LRO |
2261 					   DEV_RX_OFFLOAD_VLAN_STRIP)) {
2262 				PMD_DRV_LOG(INFO,
2263 					"disabled split ring vectorized rx for offloading enabled");
2264 				hw->use_vec_rx = 0;
2265 			}
2266 
2267 			if (rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128) {
2268 				PMD_DRV_LOG(INFO,
2269 					"disabled split ring vectorized rx, max SIMD bitwidth too low");
2270 				hw->use_vec_rx = 0;
2271 			}
2272 		}
2273 	}
2274 
2275 	return 0;
2276 }
2277 
2278 
2279 static int
2280 virtio_dev_start(struct rte_eth_dev *dev)
2281 {
2282 	uint16_t nb_queues, i;
2283 	struct virtqueue *vq;
2284 	struct virtio_hw *hw = dev->data->dev_private;
2285 	int ret;
2286 
2287 	/* Finish the initialization of the queues */
2288 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
2289 		ret = virtio_dev_rx_queue_setup_finish(dev, i);
2290 		if (ret < 0)
2291 			return ret;
2292 	}
2293 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
2294 		ret = virtio_dev_tx_queue_setup_finish(dev, i);
2295 		if (ret < 0)
2296 			return ret;
2297 	}
2298 
2299 	/* check if lsc interrupt feature is enabled */
2300 	if (dev->data->dev_conf.intr_conf.lsc) {
2301 		if (!(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
2302 			PMD_DRV_LOG(ERR, "link status not supported by host");
2303 			return -ENOTSUP;
2304 		}
2305 	}
2306 
2307 	/* Enable uio/vfio intr/eventfd mapping: althrough we already did that
2308 	 * in device configure, but it could be unmapped  when device is
2309 	 * stopped.
2310 	 */
2311 	if (dev->data->dev_conf.intr_conf.lsc ||
2312 	    dev->data->dev_conf.intr_conf.rxq) {
2313 		virtio_intr_disable(dev);
2314 
2315 		/* Setup interrupt callback  */
2316 		if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
2317 			rte_intr_callback_register(dev->intr_handle,
2318 						   virtio_interrupt_handler,
2319 						   dev);
2320 
2321 		if (virtio_intr_enable(dev) < 0) {
2322 			PMD_DRV_LOG(ERR, "interrupt enable failed");
2323 			return -EIO;
2324 		}
2325 	}
2326 
2327 	/*Notify the backend
2328 	 *Otherwise the tap backend might already stop its queue due to fullness.
2329 	 *vhost backend will have no chance to be waked up
2330 	 */
2331 	nb_queues = RTE_MAX(dev->data->nb_rx_queues, dev->data->nb_tx_queues);
2332 	if (hw->max_queue_pairs > 1) {
2333 		if (virtio_set_multiple_queues(dev, nb_queues) != 0)
2334 			return -EINVAL;
2335 	}
2336 
2337 	PMD_INIT_LOG(DEBUG, "nb_queues=%d", nb_queues);
2338 
2339 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
2340 		vq = virtnet_rxq_to_vq(dev->data->rx_queues[i]);
2341 		/* Flush the old packets */
2342 		virtqueue_rxvq_flush(vq);
2343 		virtqueue_notify(vq);
2344 	}
2345 
2346 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
2347 		vq = virtnet_txq_to_vq(dev->data->tx_queues[i]);
2348 		virtqueue_notify(vq);
2349 	}
2350 
2351 	PMD_INIT_LOG(DEBUG, "Notified backend at initialization");
2352 
2353 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
2354 		vq = virtnet_rxq_to_vq(dev->data->rx_queues[i]);
2355 		VIRTQUEUE_DUMP(vq);
2356 	}
2357 
2358 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
2359 		vq = virtnet_txq_to_vq(dev->data->tx_queues[i]);
2360 		VIRTQUEUE_DUMP(vq);
2361 	}
2362 
2363 	set_rxtx_funcs(dev);
2364 	hw->started = 1;
2365 
2366 	/* Initialize Link state */
2367 	virtio_dev_link_update(dev, 0);
2368 
2369 	return 0;
2370 }
2371 
2372 static void virtio_dev_free_mbufs(struct rte_eth_dev *dev)
2373 {
2374 	struct virtio_hw *hw = dev->data->dev_private;
2375 	uint16_t nr_vq = virtio_get_nr_vq(hw);
2376 	const char *type __rte_unused;
2377 	unsigned int i, mbuf_num = 0;
2378 	struct virtqueue *vq;
2379 	struct rte_mbuf *buf;
2380 	int queue_type;
2381 
2382 	if (hw->vqs == NULL)
2383 		return;
2384 
2385 	for (i = 0; i < nr_vq; i++) {
2386 		vq = hw->vqs[i];
2387 		if (!vq)
2388 			continue;
2389 
2390 		queue_type = virtio_get_queue_type(hw, i);
2391 		if (queue_type == VTNET_RQ)
2392 			type = "rxq";
2393 		else if (queue_type == VTNET_TQ)
2394 			type = "txq";
2395 		else
2396 			continue;
2397 
2398 		PMD_INIT_LOG(DEBUG,
2399 			"Before freeing %s[%d] used and unused buf",
2400 			type, i);
2401 		VIRTQUEUE_DUMP(vq);
2402 
2403 		while ((buf = virtqueue_detach_unused(vq)) != NULL) {
2404 			rte_pktmbuf_free(buf);
2405 			mbuf_num++;
2406 		}
2407 
2408 		PMD_INIT_LOG(DEBUG,
2409 			"After freeing %s[%d] used and unused buf",
2410 			type, i);
2411 		VIRTQUEUE_DUMP(vq);
2412 	}
2413 
2414 	PMD_INIT_LOG(DEBUG, "%d mbufs freed", mbuf_num);
2415 }
2416 
2417 static void
2418 virtio_tx_completed_cleanup(struct rte_eth_dev *dev)
2419 {
2420 	struct virtio_hw *hw = dev->data->dev_private;
2421 	struct virtqueue *vq;
2422 	int qidx;
2423 	void (*xmit_cleanup)(struct virtqueue *vq, uint16_t nb_used);
2424 
2425 	if (virtio_with_packed_queue(hw)) {
2426 		if (hw->use_vec_tx)
2427 			xmit_cleanup = &virtio_xmit_cleanup_inorder_packed;
2428 		else if (virtio_with_feature(hw, VIRTIO_F_IN_ORDER))
2429 			xmit_cleanup = &virtio_xmit_cleanup_inorder_packed;
2430 		else
2431 			xmit_cleanup = &virtio_xmit_cleanup_normal_packed;
2432 	} else {
2433 		if (hw->use_inorder_tx)
2434 			xmit_cleanup = &virtio_xmit_cleanup_inorder;
2435 		else
2436 			xmit_cleanup = &virtio_xmit_cleanup;
2437 	}
2438 
2439 	for (qidx = 0; qidx < hw->max_queue_pairs; qidx++) {
2440 		vq = hw->vqs[2 * qidx + VTNET_SQ_TQ_QUEUE_IDX];
2441 		if (vq != NULL)
2442 			xmit_cleanup(vq, virtqueue_nused(vq));
2443 	}
2444 }
2445 
2446 /*
2447  * Stop device: disable interrupt and mark link down
2448  */
2449 int
2450 virtio_dev_stop(struct rte_eth_dev *dev)
2451 {
2452 	struct virtio_hw *hw = dev->data->dev_private;
2453 	struct rte_eth_link link;
2454 	struct rte_intr_conf *intr_conf = &dev->data->dev_conf.intr_conf;
2455 
2456 	PMD_INIT_LOG(DEBUG, "stop");
2457 	dev->data->dev_started = 0;
2458 
2459 	rte_spinlock_lock(&hw->state_lock);
2460 	if (!hw->started)
2461 		goto out_unlock;
2462 	hw->started = 0;
2463 
2464 	virtio_tx_completed_cleanup(dev);
2465 
2466 	if (intr_conf->lsc || intr_conf->rxq) {
2467 		virtio_intr_disable(dev);
2468 
2469 		/* Reset interrupt callback  */
2470 		if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
2471 			rte_intr_callback_unregister(dev->intr_handle,
2472 						     virtio_interrupt_handler,
2473 						     dev);
2474 		}
2475 	}
2476 
2477 	memset(&link, 0, sizeof(link));
2478 	rte_eth_linkstatus_set(dev, &link);
2479 out_unlock:
2480 	rte_spinlock_unlock(&hw->state_lock);
2481 
2482 	return 0;
2483 }
2484 
2485 static int
2486 virtio_dev_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complete)
2487 {
2488 	struct rte_eth_link link;
2489 	uint16_t status;
2490 	struct virtio_hw *hw = dev->data->dev_private;
2491 
2492 	memset(&link, 0, sizeof(link));
2493 	link.link_duplex = hw->duplex;
2494 	link.link_speed  = hw->speed;
2495 	link.link_autoneg = ETH_LINK_AUTONEG;
2496 
2497 	if (!hw->started) {
2498 		link.link_status = ETH_LINK_DOWN;
2499 		link.link_speed = ETH_SPEED_NUM_NONE;
2500 	} else if (virtio_with_feature(hw, VIRTIO_NET_F_STATUS)) {
2501 		PMD_INIT_LOG(DEBUG, "Get link status from hw");
2502 		virtio_read_dev_config(hw,
2503 				offsetof(struct virtio_net_config, status),
2504 				&status, sizeof(status));
2505 		if ((status & VIRTIO_NET_S_LINK_UP) == 0) {
2506 			link.link_status = ETH_LINK_DOWN;
2507 			link.link_speed = ETH_SPEED_NUM_NONE;
2508 			PMD_INIT_LOG(DEBUG, "Port %d is down",
2509 				     dev->data->port_id);
2510 		} else {
2511 			link.link_status = ETH_LINK_UP;
2512 			PMD_INIT_LOG(DEBUG, "Port %d is up",
2513 				     dev->data->port_id);
2514 		}
2515 	} else {
2516 		link.link_status = ETH_LINK_UP;
2517 	}
2518 
2519 	return rte_eth_linkstatus_set(dev, &link);
2520 }
2521 
2522 static int
2523 virtio_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask)
2524 {
2525 	const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
2526 	struct virtio_hw *hw = dev->data->dev_private;
2527 	uint64_t offloads = rxmode->offloads;
2528 
2529 	if (mask & ETH_VLAN_FILTER_MASK) {
2530 		if ((offloads & DEV_RX_OFFLOAD_VLAN_FILTER) &&
2531 				!virtio_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) {
2532 
2533 			PMD_DRV_LOG(NOTICE,
2534 				"vlan filtering not available on this host");
2535 
2536 			return -ENOTSUP;
2537 		}
2538 	}
2539 
2540 	if (mask & ETH_VLAN_STRIP_MASK)
2541 		hw->vlan_strip = !!(offloads & DEV_RX_OFFLOAD_VLAN_STRIP);
2542 
2543 	return 0;
2544 }
2545 
2546 static int
2547 virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2548 {
2549 	uint64_t tso_mask, host_features;
2550 	struct virtio_hw *hw = dev->data->dev_private;
2551 	dev_info->speed_capa = virtio_dev_speed_capa_get(hw->speed);
2552 
2553 	dev_info->max_rx_queues =
2554 		RTE_MIN(hw->max_queue_pairs, VIRTIO_MAX_RX_QUEUES);
2555 	dev_info->max_tx_queues =
2556 		RTE_MIN(hw->max_queue_pairs, VIRTIO_MAX_TX_QUEUES);
2557 	dev_info->min_rx_bufsize = VIRTIO_MIN_RX_BUFSIZE;
2558 	dev_info->max_rx_pktlen = VIRTIO_MAX_RX_PKTLEN;
2559 	dev_info->max_mac_addrs = VIRTIO_MAX_MAC_ADDRS;
2560 	dev_info->max_mtu = hw->max_mtu;
2561 
2562 	host_features = VIRTIO_OPS(hw)->get_features(hw);
2563 	dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP;
2564 	dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_JUMBO_FRAME;
2565 	if (host_features & (1ULL << VIRTIO_NET_F_MRG_RXBUF))
2566 		dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_SCATTER;
2567 	if (host_features & (1ULL << VIRTIO_NET_F_GUEST_CSUM)) {
2568 		dev_info->rx_offload_capa |=
2569 			DEV_RX_OFFLOAD_TCP_CKSUM |
2570 			DEV_RX_OFFLOAD_UDP_CKSUM;
2571 	}
2572 	if (host_features & (1ULL << VIRTIO_NET_F_CTRL_VLAN))
2573 		dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_VLAN_FILTER;
2574 	tso_mask = (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
2575 		(1ULL << VIRTIO_NET_F_GUEST_TSO6);
2576 	if ((host_features & tso_mask) == tso_mask)
2577 		dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_TCP_LRO;
2578 
2579 	dev_info->tx_offload_capa = DEV_TX_OFFLOAD_MULTI_SEGS |
2580 				    DEV_TX_OFFLOAD_VLAN_INSERT;
2581 	if (host_features & (1ULL << VIRTIO_NET_F_CSUM)) {
2582 		dev_info->tx_offload_capa |=
2583 			DEV_TX_OFFLOAD_UDP_CKSUM |
2584 			DEV_TX_OFFLOAD_TCP_CKSUM;
2585 	}
2586 	tso_mask = (1ULL << VIRTIO_NET_F_HOST_TSO4) |
2587 		(1ULL << VIRTIO_NET_F_HOST_TSO6);
2588 	if ((host_features & tso_mask) == tso_mask)
2589 		dev_info->tx_offload_capa |= DEV_TX_OFFLOAD_TCP_TSO;
2590 
2591 	if (host_features & (1ULL << VIRTIO_F_RING_PACKED)) {
2592 		/*
2593 		 * According to 2.7 Packed Virtqueues,
2594 		 * 2.7.10.1 Structure Size and Alignment:
2595 		 * The Queue Size value does not have to be a power of 2.
2596 		 */
2597 		dev_info->rx_desc_lim.nb_max = UINT16_MAX;
2598 		dev_info->tx_desc_lim.nb_max = UINT16_MAX;
2599 	} else {
2600 		/*
2601 		 * According to 2.6 Split Virtqueues:
2602 		 * Queue Size value is always a power of 2. The maximum Queue
2603 		 * Size value is 32768.
2604 		 */
2605 		dev_info->rx_desc_lim.nb_max = 32768;
2606 		dev_info->tx_desc_lim.nb_max = 32768;
2607 	}
2608 	/*
2609 	 * Actual minimum is not the same for virtqueues of different kinds,
2610 	 * but to avoid tangling the code with separate branches, rely on
2611 	 * default thresholds since desc number must be at least of their size.
2612 	 */
2613 	dev_info->rx_desc_lim.nb_min = RTE_MAX(DEFAULT_RX_FREE_THRESH,
2614 					       RTE_VIRTIO_VPMD_RX_REARM_THRESH);
2615 	dev_info->tx_desc_lim.nb_min = DEFAULT_TX_FREE_THRESH;
2616 	dev_info->rx_desc_lim.nb_align = 1;
2617 	dev_info->tx_desc_lim.nb_align = 1;
2618 
2619 	return 0;
2620 }
2621 
2622 /*
2623  * It enables testpmd to collect per queue stats.
2624  */
2625 static int
2626 virtio_dev_queue_stats_mapping_set(__rte_unused struct rte_eth_dev *eth_dev,
2627 __rte_unused uint16_t queue_id, __rte_unused uint8_t stat_idx,
2628 __rte_unused uint8_t is_rx)
2629 {
2630 	return 0;
2631 }
2632 
2633 RTE_LOG_REGISTER_SUFFIX(virtio_logtype_init, init, NOTICE);
2634 RTE_LOG_REGISTER_SUFFIX(virtio_logtype_driver, driver, NOTICE);
2635