1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2011, Bryan Venteicher <[email protected]>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice unmodified, this list of conditions, and the following
12 * disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /* Driver for VirtIO network devices. */
30
31 #include <sys/cdefs.h>
32 #include <sys/param.h>
33 #include <sys/eventhandler.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/sockio.h>
37 #include <sys/malloc.h>
38 #include <sys/mbuf.h>
39 #include <sys/module.h>
40 #include <sys/msan.h>
41 #include <sys/socket.h>
42 #include <sys/sysctl.h>
43 #include <sys/random.h>
44 #include <sys/sglist.h>
45 #include <sys/lock.h>
46 #include <sys/mutex.h>
47 #include <sys/taskqueue.h>
48 #include <sys/smp.h>
49 #include <machine/smp.h>
50
51 #include <vm/uma.h>
52
53 #include <net/debugnet.h>
54 #include <net/ethernet.h>
55 #include <net/pfil.h>
56 #include <net/if.h>
57 #include <net/if_var.h>
58 #include <net/if_arp.h>
59 #include <net/if_dl.h>
60 #include <net/if_types.h>
61 #include <net/if_media.h>
62 #include <net/if_vlan_var.h>
63
64 #include <net/bpf.h>
65
66 #include <netinet/in_systm.h>
67 #include <netinet/in.h>
68 #include <netinet/ip.h>
69 #include <netinet/ip6.h>
70 #include <netinet6/ip6_var.h>
71 #include <netinet/udp.h>
72 #include <netinet/tcp.h>
73 #include <netinet/tcp_lro.h>
74
75 #include <machine/bus.h>
76 #include <machine/resource.h>
77 #include <sys/bus.h>
78 #include <sys/rman.h>
79
80 #include <dev/virtio/virtio.h>
81 #include <dev/virtio/virtqueue.h>
82 #include <dev/virtio/network/virtio_net.h>
83 #include <dev/virtio/network/if_vtnetvar.h>
84 #include "virtio_if.h"
85
86 #include "opt_inet.h"
87 #include "opt_inet6.h"
88
89 #if defined(INET) || defined(INET6)
90 #include <machine/in_cksum.h>
91 #endif
92
93 #ifdef __NO_STRICT_ALIGNMENT
94 #define VTNET_ETHER_ALIGN 0
95 #else /* Strict alignment */
96 #define VTNET_ETHER_ALIGN ETHER_ALIGN
97 #endif
98
99 static int vtnet_modevent(module_t, int, void *);
100
101 static int vtnet_probe(device_t);
102 static int vtnet_attach(device_t);
103 static int vtnet_detach(device_t);
104 static int vtnet_suspend(device_t);
105 static int vtnet_resume(device_t);
106 static int vtnet_shutdown(device_t);
107 static int vtnet_attach_completed(device_t);
108 static int vtnet_config_change(device_t);
109
110 static int vtnet_negotiate_features(struct vtnet_softc *);
111 static int vtnet_setup_features(struct vtnet_softc *);
112 static int vtnet_init_rxq(struct vtnet_softc *, int);
113 static int vtnet_init_txq(struct vtnet_softc *, int);
114 static int vtnet_alloc_rxtx_queues(struct vtnet_softc *);
115 static void vtnet_free_rxtx_queues(struct vtnet_softc *);
116 static int vtnet_alloc_rx_filters(struct vtnet_softc *);
117 static void vtnet_free_rx_filters(struct vtnet_softc *);
118 static int vtnet_alloc_virtqueues(struct vtnet_softc *);
119 static void vtnet_alloc_interface(struct vtnet_softc *);
120 static int vtnet_setup_interface(struct vtnet_softc *);
121 static int vtnet_ioctl_mtu(struct vtnet_softc *, u_int);
122 static int vtnet_ioctl_ifflags(struct vtnet_softc *);
123 static int vtnet_ioctl_multi(struct vtnet_softc *);
124 static int vtnet_ioctl_ifcap(struct vtnet_softc *, struct ifreq *);
125 static int vtnet_ioctl(if_t, u_long, caddr_t);
126 static uint64_t vtnet_get_counter(if_t, ift_counter);
127
128 static int vtnet_rxq_populate(struct vtnet_rxq *);
129 static void vtnet_rxq_free_mbufs(struct vtnet_rxq *);
130 static struct mbuf *
131 vtnet_rx_alloc_buf(struct vtnet_softc *, int , struct mbuf **);
132 static int vtnet_rxq_replace_lro_nomrg_buf(struct vtnet_rxq *,
133 struct mbuf *, int);
134 static int vtnet_rxq_replace_buf(struct vtnet_rxq *, struct mbuf *, int);
135 static int vtnet_rxq_enqueue_buf(struct vtnet_rxq *, struct mbuf *);
136 static int vtnet_rxq_new_buf(struct vtnet_rxq *);
137 static int vtnet_rxq_csum_needs_csum(struct vtnet_rxq *, struct mbuf *,
138 uint16_t, int, struct virtio_net_hdr *);
139 static int vtnet_rxq_csum_data_valid(struct vtnet_rxq *, struct mbuf *,
140 uint16_t, int, struct virtio_net_hdr *);
141 static int vtnet_rxq_csum(struct vtnet_rxq *, struct mbuf *,
142 struct virtio_net_hdr *);
143 static void vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *, int);
144 static void vtnet_rxq_discard_buf(struct vtnet_rxq *, struct mbuf *);
145 static int vtnet_rxq_merged_eof(struct vtnet_rxq *, struct mbuf *, int);
146 static void vtnet_rxq_input(struct vtnet_rxq *, struct mbuf *,
147 struct virtio_net_hdr *);
148 static int vtnet_rxq_eof(struct vtnet_rxq *);
149 static void vtnet_rx_vq_process(struct vtnet_rxq *rxq, int tries);
150 static void vtnet_rx_vq_intr(void *);
151 static void vtnet_rxq_tq_intr(void *, int);
152
153 static int vtnet_txq_intr_threshold(struct vtnet_txq *);
154 static int vtnet_txq_below_threshold(struct vtnet_txq *);
155 static int vtnet_txq_notify(struct vtnet_txq *);
156 static void vtnet_txq_free_mbufs(struct vtnet_txq *);
157 static int vtnet_txq_offload_ctx(struct vtnet_txq *, struct mbuf *,
158 int *, int *, int *);
159 static int vtnet_txq_offload_tso(struct vtnet_txq *, struct mbuf *, int,
160 int, struct virtio_net_hdr *);
161 static struct mbuf *
162 vtnet_txq_offload(struct vtnet_txq *, struct mbuf *,
163 struct virtio_net_hdr *);
164 static int vtnet_txq_enqueue_buf(struct vtnet_txq *, struct mbuf **,
165 struct vtnet_tx_header *);
166 static int vtnet_txq_encap(struct vtnet_txq *, struct mbuf **, int);
167 #ifdef VTNET_LEGACY_TX
168 static void vtnet_start_locked(struct vtnet_txq *, if_t);
169 static void vtnet_start(if_t);
170 #else
171 static int vtnet_txq_mq_start_locked(struct vtnet_txq *, struct mbuf *);
172 static int vtnet_txq_mq_start(if_t, struct mbuf *);
173 static void vtnet_txq_tq_deferred(void *, int);
174 #endif
175 static void vtnet_txq_start(struct vtnet_txq *);
176 static void vtnet_txq_tq_intr(void *, int);
177 static int vtnet_txq_eof(struct vtnet_txq *);
178 static void vtnet_tx_vq_intr(void *);
179 static void vtnet_tx_start_all(struct vtnet_softc *);
180
181 #ifndef VTNET_LEGACY_TX
182 static void vtnet_qflush(if_t);
183 #endif
184
185 static int vtnet_watchdog(struct vtnet_txq *);
186 static void vtnet_accum_stats(struct vtnet_softc *,
187 struct vtnet_rxq_stats *, struct vtnet_txq_stats *);
188 static void vtnet_tick(void *);
189
190 static void vtnet_start_taskqueues(struct vtnet_softc *);
191 static void vtnet_free_taskqueues(struct vtnet_softc *);
192 static void vtnet_drain_taskqueues(struct vtnet_softc *);
193
194 static void vtnet_drain_rxtx_queues(struct vtnet_softc *);
195 static void vtnet_stop_rendezvous(struct vtnet_softc *);
196 static void vtnet_stop(struct vtnet_softc *);
197 static int vtnet_virtio_reinit(struct vtnet_softc *);
198 static void vtnet_init_rx_filters(struct vtnet_softc *);
199 static int vtnet_init_rx_queues(struct vtnet_softc *);
200 static int vtnet_init_tx_queues(struct vtnet_softc *);
201 static int vtnet_init_rxtx_queues(struct vtnet_softc *);
202 static void vtnet_set_active_vq_pairs(struct vtnet_softc *);
203 static void vtnet_update_rx_offloads(struct vtnet_softc *);
204 static int vtnet_reinit(struct vtnet_softc *);
205 static void vtnet_init_locked(struct vtnet_softc *, int);
206 static void vtnet_init(void *);
207
208 static void vtnet_free_ctrl_vq(struct vtnet_softc *);
209 static void vtnet_exec_ctrl_cmd(struct vtnet_softc *, void *,
210 struct sglist *, int, int);
211 static int vtnet_ctrl_mac_cmd(struct vtnet_softc *, uint8_t *);
212 static int vtnet_ctrl_guest_offloads(struct vtnet_softc *, uint64_t);
213 static int vtnet_ctrl_mq_cmd(struct vtnet_softc *, uint16_t);
214 static int vtnet_ctrl_rx_cmd(struct vtnet_softc *, uint8_t, bool);
215 static int vtnet_set_promisc(struct vtnet_softc *, bool);
216 static int vtnet_set_allmulti(struct vtnet_softc *, bool);
217 static void vtnet_rx_filter(struct vtnet_softc *);
218 static void vtnet_rx_filter_mac(struct vtnet_softc *);
219 static int vtnet_exec_vlan_filter(struct vtnet_softc *, int, uint16_t);
220 static void vtnet_rx_filter_vlan(struct vtnet_softc *);
221 static void vtnet_update_vlan_filter(struct vtnet_softc *, int, uint16_t);
222 static void vtnet_register_vlan(void *, if_t, uint16_t);
223 static void vtnet_unregister_vlan(void *, if_t, uint16_t);
224
225 static void vtnet_update_speed_duplex(struct vtnet_softc *);
226 static int vtnet_is_link_up(struct vtnet_softc *);
227 static void vtnet_update_link_status(struct vtnet_softc *);
228 static int vtnet_ifmedia_upd(if_t);
229 static void vtnet_ifmedia_sts(if_t, struct ifmediareq *);
230 static void vtnet_get_macaddr(struct vtnet_softc *);
231 static void vtnet_set_macaddr(struct vtnet_softc *);
232 static void vtnet_attached_set_macaddr(struct vtnet_softc *);
233 static void vtnet_vlan_tag_remove(struct mbuf *);
234 static void vtnet_set_rx_process_limit(struct vtnet_softc *);
235
236 static void vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *,
237 struct sysctl_oid_list *, struct vtnet_rxq *);
238 static void vtnet_setup_txq_sysctl(struct sysctl_ctx_list *,
239 struct sysctl_oid_list *, struct vtnet_txq *);
240 static void vtnet_setup_queue_sysctl(struct vtnet_softc *);
241 static void vtnet_load_tunables(struct vtnet_softc *);
242 static void vtnet_setup_sysctl(struct vtnet_softc *);
243
244 static int vtnet_rxq_enable_intr(struct vtnet_rxq *);
245 static void vtnet_rxq_disable_intr(struct vtnet_rxq *);
246 static int vtnet_txq_enable_intr(struct vtnet_txq *);
247 static void vtnet_txq_disable_intr(struct vtnet_txq *);
248 static void vtnet_enable_rx_interrupts(struct vtnet_softc *);
249 static void vtnet_enable_tx_interrupts(struct vtnet_softc *);
250 static void vtnet_enable_interrupts(struct vtnet_softc *);
251 static void vtnet_disable_rx_interrupts(struct vtnet_softc *);
252 static void vtnet_disable_tx_interrupts(struct vtnet_softc *);
253 static void vtnet_disable_interrupts(struct vtnet_softc *);
254
255 static int vtnet_tunable_int(struct vtnet_softc *, const char *, int);
256
257 DEBUGNET_DEFINE(vtnet);
258
259 #define vtnet_htog16(_sc, _val) virtio_htog16(vtnet_modern(_sc), _val)
260 #define vtnet_htog32(_sc, _val) virtio_htog32(vtnet_modern(_sc), _val)
261 #define vtnet_htog64(_sc, _val) virtio_htog64(vtnet_modern(_sc), _val)
262 #define vtnet_gtoh16(_sc, _val) virtio_gtoh16(vtnet_modern(_sc), _val)
263 #define vtnet_gtoh32(_sc, _val) virtio_gtoh32(vtnet_modern(_sc), _val)
264 #define vtnet_gtoh64(_sc, _val) virtio_gtoh64(vtnet_modern(_sc), _val)
265
266 /* Tunables. */
267 static SYSCTL_NODE(_hw, OID_AUTO, vtnet, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
268 "VirtIO Net driver parameters");
269
270 static int vtnet_csum_disable = 0;
271 SYSCTL_INT(_hw_vtnet, OID_AUTO, csum_disable, CTLFLAG_RDTUN,
272 &vtnet_csum_disable, 0, "Disables receive and send checksum offload");
273
274 static int vtnet_fixup_needs_csum = 0;
275 SYSCTL_INT(_hw_vtnet, OID_AUTO, fixup_needs_csum, CTLFLAG_RDTUN,
276 &vtnet_fixup_needs_csum, 0,
277 "Calculate valid checksum for NEEDS_CSUM packets");
278
279 static int vtnet_tso_disable = 0;
280 SYSCTL_INT(_hw_vtnet, OID_AUTO, tso_disable, CTLFLAG_RDTUN,
281 &vtnet_tso_disable, 0, "Disables TSO");
282
283 static int vtnet_lro_disable = 0;
284 SYSCTL_INT(_hw_vtnet, OID_AUTO, lro_disable, CTLFLAG_RDTUN,
285 &vtnet_lro_disable, 0, "Disables hardware LRO");
286
287 static int vtnet_mq_disable = 0;
288 SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_disable, CTLFLAG_RDTUN,
289 &vtnet_mq_disable, 0, "Disables multiqueue support");
290
291 static int vtnet_mq_max_pairs = VTNET_MAX_QUEUE_PAIRS;
292 SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_max_pairs, CTLFLAG_RDTUN,
293 &vtnet_mq_max_pairs, 0, "Maximum number of multiqueue pairs");
294
295 static int vtnet_tso_maxlen = IP_MAXPACKET;
296 SYSCTL_INT(_hw_vtnet, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN,
297 &vtnet_tso_maxlen, 0, "TSO burst limit");
298
299 static int vtnet_rx_process_limit = 1024;
300 SYSCTL_INT(_hw_vtnet, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
301 &vtnet_rx_process_limit, 0,
302 "Number of RX segments processed in one pass");
303
304 static int vtnet_lro_entry_count = 128;
305 SYSCTL_INT(_hw_vtnet, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN,
306 &vtnet_lro_entry_count, 0, "Software LRO entry count");
307
308 /* Enable sorted LRO, and the depth of the mbuf queue. */
309 static int vtnet_lro_mbufq_depth = 0;
310 SYSCTL_UINT(_hw_vtnet, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN,
311 &vtnet_lro_mbufq_depth, 0, "Depth of software LRO mbuf queue");
312
313 static uma_zone_t vtnet_tx_header_zone;
314
315 static struct virtio_feature_desc vtnet_feature_desc[] = {
316 { VIRTIO_NET_F_CSUM, "TxChecksum" },
317 { VIRTIO_NET_F_GUEST_CSUM, "RxChecksum" },
318 { VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, "CtrlRxOffloads" },
319 { VIRTIO_NET_F_MAC, "MAC" },
320 { VIRTIO_NET_F_GSO, "TxGSO" },
321 { VIRTIO_NET_F_GUEST_TSO4, "RxLROv4" },
322 { VIRTIO_NET_F_GUEST_TSO6, "RxLROv6" },
323 { VIRTIO_NET_F_GUEST_ECN, "RxLROECN" },
324 { VIRTIO_NET_F_GUEST_UFO, "RxUFO" },
325 { VIRTIO_NET_F_HOST_TSO4, "TxTSOv4" },
326 { VIRTIO_NET_F_HOST_TSO6, "TxTSOv6" },
327 { VIRTIO_NET_F_HOST_ECN, "TxTSOECN" },
328 { VIRTIO_NET_F_HOST_UFO, "TxUFO" },
329 { VIRTIO_NET_F_MRG_RXBUF, "MrgRxBuf" },
330 { VIRTIO_NET_F_STATUS, "Status" },
331 { VIRTIO_NET_F_CTRL_VQ, "CtrlVq" },
332 { VIRTIO_NET_F_CTRL_RX, "CtrlRxMode" },
333 { VIRTIO_NET_F_CTRL_VLAN, "CtrlVLANFilter" },
334 { VIRTIO_NET_F_CTRL_RX_EXTRA, "CtrlRxModeExtra" },
335 { VIRTIO_NET_F_GUEST_ANNOUNCE, "GuestAnnounce" },
336 { VIRTIO_NET_F_MQ, "Multiqueue" },
337 { VIRTIO_NET_F_CTRL_MAC_ADDR, "CtrlMacAddr" },
338 { VIRTIO_NET_F_SPEED_DUPLEX, "SpeedDuplex" },
339
340 { 0, NULL }
341 };
342
343 static device_method_t vtnet_methods[] = {
344 /* Device methods. */
345 DEVMETHOD(device_probe, vtnet_probe),
346 DEVMETHOD(device_attach, vtnet_attach),
347 DEVMETHOD(device_detach, vtnet_detach),
348 DEVMETHOD(device_suspend, vtnet_suspend),
349 DEVMETHOD(device_resume, vtnet_resume),
350 DEVMETHOD(device_shutdown, vtnet_shutdown),
351
352 /* VirtIO methods. */
353 DEVMETHOD(virtio_attach_completed, vtnet_attach_completed),
354 DEVMETHOD(virtio_config_change, vtnet_config_change),
355
356 DEVMETHOD_END
357 };
358
359 #ifdef DEV_NETMAP
360 #include <dev/netmap/if_vtnet_netmap.h>
361 #endif
362
363 static driver_t vtnet_driver = {
364 .name = "vtnet",
365 .methods = vtnet_methods,
366 .size = sizeof(struct vtnet_softc)
367 };
368 VIRTIO_DRIVER_MODULE(vtnet, vtnet_driver, vtnet_modevent, NULL);
369 MODULE_VERSION(vtnet, 1);
370 MODULE_DEPEND(vtnet, virtio, 1, 1, 1);
371 #ifdef DEV_NETMAP
372 MODULE_DEPEND(vtnet, netmap, 1, 1, 1);
373 #endif
374
375 VIRTIO_SIMPLE_PNPINFO(vtnet, VIRTIO_ID_NETWORK, "VirtIO Networking Adapter");
376
377 static int
vtnet_modevent(module_t mod __unused,int type,void * unused __unused)378 vtnet_modevent(module_t mod __unused, int type, void *unused __unused)
379 {
380 int error = 0;
381 static int loaded = 0;
382
383 switch (type) {
384 case MOD_LOAD:
385 if (loaded++ == 0) {
386 vtnet_tx_header_zone = uma_zcreate("vtnet_tx_hdr",
387 sizeof(struct vtnet_tx_header),
388 NULL, NULL, NULL, NULL, 0, 0);
389 #ifdef DEBUGNET
390 /*
391 * We need to allocate from this zone in the transmit path, so ensure
392 * that we have at least one item per header available.
393 * XXX add a separate zone like we do for mbufs? otherwise we may alloc
394 * buckets
395 */
396 uma_zone_reserve(vtnet_tx_header_zone, DEBUGNET_MAX_IN_FLIGHT * 2);
397 uma_prealloc(vtnet_tx_header_zone, DEBUGNET_MAX_IN_FLIGHT * 2);
398 #endif
399 }
400 break;
401 case MOD_QUIESCE:
402 if (uma_zone_get_cur(vtnet_tx_header_zone) > 0)
403 error = EBUSY;
404 break;
405 case MOD_UNLOAD:
406 if (--loaded == 0) {
407 uma_zdestroy(vtnet_tx_header_zone);
408 vtnet_tx_header_zone = NULL;
409 }
410 break;
411 case MOD_SHUTDOWN:
412 break;
413 default:
414 error = EOPNOTSUPP;
415 break;
416 }
417
418 return (error);
419 }
420
421 static int
vtnet_probe(device_t dev)422 vtnet_probe(device_t dev)
423 {
424 return (VIRTIO_SIMPLE_PROBE(dev, vtnet));
425 }
426
427 static int
vtnet_attach(device_t dev)428 vtnet_attach(device_t dev)
429 {
430 struct vtnet_softc *sc;
431 int error;
432
433 sc = device_get_softc(dev);
434 sc->vtnet_dev = dev;
435 virtio_set_feature_desc(dev, vtnet_feature_desc);
436
437 VTNET_CORE_LOCK_INIT(sc);
438 callout_init_mtx(&sc->vtnet_tick_ch, VTNET_CORE_MTX(sc), 0);
439 vtnet_load_tunables(sc);
440
441 vtnet_alloc_interface(sc);
442 vtnet_setup_sysctl(sc);
443
444 error = vtnet_setup_features(sc);
445 if (error) {
446 device_printf(dev, "cannot setup features\n");
447 goto fail;
448 }
449
450 error = vtnet_alloc_rx_filters(sc);
451 if (error) {
452 device_printf(dev, "cannot allocate Rx filters\n");
453 goto fail;
454 }
455
456 error = vtnet_alloc_rxtx_queues(sc);
457 if (error) {
458 device_printf(dev, "cannot allocate queues\n");
459 goto fail;
460 }
461
462 error = vtnet_alloc_virtqueues(sc);
463 if (error) {
464 device_printf(dev, "cannot allocate virtqueues\n");
465 goto fail;
466 }
467
468 error = vtnet_setup_interface(sc);
469 if (error) {
470 device_printf(dev, "cannot setup interface\n");
471 goto fail;
472 }
473
474 error = virtio_setup_intr(dev, INTR_TYPE_NET);
475 if (error) {
476 device_printf(dev, "cannot setup interrupts\n");
477 ether_ifdetach(sc->vtnet_ifp);
478 goto fail;
479 }
480
481 #ifdef DEV_NETMAP
482 vtnet_netmap_attach(sc);
483 #endif
484 vtnet_start_taskqueues(sc);
485
486 fail:
487 if (error)
488 vtnet_detach(dev);
489
490 return (error);
491 }
492
493 static int
vtnet_detach(device_t dev)494 vtnet_detach(device_t dev)
495 {
496 struct vtnet_softc *sc;
497 if_t ifp;
498
499 sc = device_get_softc(dev);
500 ifp = sc->vtnet_ifp;
501
502 if (device_is_attached(dev)) {
503 VTNET_CORE_LOCK(sc);
504 vtnet_stop(sc);
505 VTNET_CORE_UNLOCK(sc);
506
507 callout_drain(&sc->vtnet_tick_ch);
508 vtnet_drain_taskqueues(sc);
509
510 ether_ifdetach(ifp);
511 }
512
513 #ifdef DEV_NETMAP
514 netmap_detach(ifp);
515 #endif
516
517 if (sc->vtnet_pfil != NULL) {
518 pfil_head_unregister(sc->vtnet_pfil);
519 sc->vtnet_pfil = NULL;
520 }
521
522 vtnet_free_taskqueues(sc);
523
524 if (sc->vtnet_vlan_attach != NULL) {
525 EVENTHANDLER_DEREGISTER(vlan_config, sc->vtnet_vlan_attach);
526 sc->vtnet_vlan_attach = NULL;
527 }
528 if (sc->vtnet_vlan_detach != NULL) {
529 EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vtnet_vlan_detach);
530 sc->vtnet_vlan_detach = NULL;
531 }
532
533 ifmedia_removeall(&sc->vtnet_media);
534
535 if (ifp != NULL) {
536 if_free(ifp);
537 sc->vtnet_ifp = NULL;
538 }
539
540 vtnet_free_rxtx_queues(sc);
541 vtnet_free_rx_filters(sc);
542
543 if (sc->vtnet_ctrl_vq != NULL)
544 vtnet_free_ctrl_vq(sc);
545
546 VTNET_CORE_LOCK_DESTROY(sc);
547
548 return (0);
549 }
550
551 static int
vtnet_suspend(device_t dev)552 vtnet_suspend(device_t dev)
553 {
554 struct vtnet_softc *sc;
555
556 sc = device_get_softc(dev);
557
558 VTNET_CORE_LOCK(sc);
559 vtnet_stop(sc);
560 sc->vtnet_flags |= VTNET_FLAG_SUSPENDED;
561 VTNET_CORE_UNLOCK(sc);
562
563 return (0);
564 }
565
566 static int
vtnet_resume(device_t dev)567 vtnet_resume(device_t dev)
568 {
569 struct vtnet_softc *sc;
570 if_t ifp;
571
572 sc = device_get_softc(dev);
573 ifp = sc->vtnet_ifp;
574
575 VTNET_CORE_LOCK(sc);
576 if (if_getflags(ifp) & IFF_UP)
577 vtnet_init_locked(sc, 0);
578 sc->vtnet_flags &= ~VTNET_FLAG_SUSPENDED;
579 VTNET_CORE_UNLOCK(sc);
580
581 return (0);
582 }
583
584 static int
vtnet_shutdown(device_t dev)585 vtnet_shutdown(device_t dev)
586 {
587 /*
588 * Suspend already does all of what we need to
589 * do here; we just never expect to be resumed.
590 */
591 return (vtnet_suspend(dev));
592 }
593
594 static int
vtnet_attach_completed(device_t dev)595 vtnet_attach_completed(device_t dev)
596 {
597 struct vtnet_softc *sc;
598
599 sc = device_get_softc(dev);
600
601 VTNET_CORE_LOCK(sc);
602 vtnet_attached_set_macaddr(sc);
603 VTNET_CORE_UNLOCK(sc);
604
605 return (0);
606 }
607
608 static int
vtnet_config_change(device_t dev)609 vtnet_config_change(device_t dev)
610 {
611 struct vtnet_softc *sc;
612
613 sc = device_get_softc(dev);
614
615 VTNET_CORE_LOCK(sc);
616 vtnet_update_link_status(sc);
617 if (sc->vtnet_link_active != 0)
618 vtnet_tx_start_all(sc);
619 VTNET_CORE_UNLOCK(sc);
620
621 return (0);
622 }
623
624 static int
vtnet_negotiate_features(struct vtnet_softc * sc)625 vtnet_negotiate_features(struct vtnet_softc *sc)
626 {
627 device_t dev;
628 uint64_t features, negotiated_features;
629 int no_csum;
630
631 dev = sc->vtnet_dev;
632 features = virtio_bus_is_modern(dev) ? VTNET_MODERN_FEATURES :
633 VTNET_LEGACY_FEATURES;
634
635 /*
636 * TSO and LRO are only available when their corresponding checksum
637 * offload feature is also negotiated.
638 */
639 no_csum = vtnet_tunable_int(sc, "csum_disable", vtnet_csum_disable);
640 if (no_csum)
641 features &= ~(VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM);
642 if (no_csum || vtnet_tunable_int(sc, "tso_disable", vtnet_tso_disable))
643 features &= ~VTNET_TSO_FEATURES;
644 if (no_csum || vtnet_tunable_int(sc, "lro_disable", vtnet_lro_disable))
645 features &= ~VTNET_LRO_FEATURES;
646
647 #ifndef VTNET_LEGACY_TX
648 if (vtnet_tunable_int(sc, "mq_disable", vtnet_mq_disable))
649 features &= ~VIRTIO_NET_F_MQ;
650 #else
651 features &= ~VIRTIO_NET_F_MQ;
652 #endif
653
654 negotiated_features = virtio_negotiate_features(dev, features);
655
656 if (virtio_with_feature(dev, VIRTIO_NET_F_MTU)) {
657 uint16_t mtu;
658
659 mtu = virtio_read_dev_config_2(dev,
660 offsetof(struct virtio_net_config, mtu));
661 if (mtu < VTNET_MIN_MTU /* || mtu > VTNET_MAX_MTU */) {
662 device_printf(dev, "Invalid MTU value: %d. "
663 "MTU feature disabled.\n", mtu);
664 features &= ~VIRTIO_NET_F_MTU;
665 negotiated_features =
666 virtio_negotiate_features(dev, features);
667 }
668 }
669
670 if (virtio_with_feature(dev, VIRTIO_NET_F_MQ)) {
671 uint16_t npairs;
672
673 npairs = virtio_read_dev_config_2(dev,
674 offsetof(struct virtio_net_config, max_virtqueue_pairs));
675 if (npairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
676 npairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX) {
677 device_printf(dev, "Invalid max_virtqueue_pairs value: "
678 "%d. Multiqueue feature disabled.\n", npairs);
679 features &= ~VIRTIO_NET_F_MQ;
680 negotiated_features =
681 virtio_negotiate_features(dev, features);
682 }
683 }
684
685 if (virtio_with_feature(dev, VTNET_LRO_FEATURES) &&
686 virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF) == 0) {
687 /*
688 * LRO without mergeable buffers requires special care. This
689 * is not ideal because every receive buffer must be large
690 * enough to hold the maximum TCP packet, the Ethernet header,
691 * and the header. This requires up to 34 descriptors with
692 * MCLBYTES clusters. If we do not have indirect descriptors,
693 * LRO is disabled since the virtqueue will not contain very
694 * many receive buffers.
695 */
696 if (!virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) {
697 device_printf(dev,
698 "Host LRO disabled since both mergeable buffers "
699 "and indirect descriptors were not negotiated\n");
700 features &= ~VTNET_LRO_FEATURES;
701 negotiated_features =
702 virtio_negotiate_features(dev, features);
703 } else
704 sc->vtnet_flags |= VTNET_FLAG_LRO_NOMRG;
705 }
706
707 sc->vtnet_features = negotiated_features;
708 sc->vtnet_negotiated_features = negotiated_features;
709
710 return (virtio_finalize_features(dev));
711 }
712
713 static int
vtnet_setup_features(struct vtnet_softc * sc)714 vtnet_setup_features(struct vtnet_softc *sc)
715 {
716 device_t dev;
717 int error;
718
719 dev = sc->vtnet_dev;
720
721 error = vtnet_negotiate_features(sc);
722 if (error)
723 return (error);
724
725 if (virtio_with_feature(dev, VIRTIO_F_VERSION_1))
726 sc->vtnet_flags |= VTNET_FLAG_MODERN;
727 if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC))
728 sc->vtnet_flags |= VTNET_FLAG_INDIRECT;
729 if (virtio_with_feature(dev, VIRTIO_RING_F_EVENT_IDX))
730 sc->vtnet_flags |= VTNET_FLAG_EVENT_IDX;
731
732 if (virtio_with_feature(dev, VIRTIO_NET_F_MAC)) {
733 /* This feature should always be negotiated. */
734 sc->vtnet_flags |= VTNET_FLAG_MAC;
735 }
736
737 if (virtio_with_feature(dev, VIRTIO_NET_F_MTU)) {
738 sc->vtnet_max_mtu = virtio_read_dev_config_2(dev,
739 offsetof(struct virtio_net_config, mtu));
740 } else
741 sc->vtnet_max_mtu = VTNET_MAX_MTU;
742
743 if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF)) {
744 sc->vtnet_flags |= VTNET_FLAG_MRG_RXBUFS;
745 sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
746 } else if (vtnet_modern(sc)) {
747 /* This is identical to the mergeable header. */
748 sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_v1);
749 } else
750 sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
751
752 if (vtnet_modern(sc) || sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS)
753 sc->vtnet_rx_nsegs = VTNET_RX_SEGS_HDR_INLINE;
754 else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG)
755 sc->vtnet_rx_nsegs = VTNET_RX_SEGS_LRO_NOMRG;
756 else
757 sc->vtnet_rx_nsegs = VTNET_RX_SEGS_HDR_SEPARATE;
758
759 /*
760 * Favor "hardware" LRO if negotiated, but support software LRO as
761 * a fallback; there is usually little benefit (or worse) with both.
762 */
763 if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) == 0 &&
764 virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6) == 0)
765 sc->vtnet_flags |= VTNET_FLAG_SW_LRO;
766
767 if (virtio_with_feature(dev, VIRTIO_NET_F_GSO) ||
768 virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4) ||
769 virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
770 sc->vtnet_tx_nsegs = VTNET_TX_SEGS_MAX;
771 else
772 sc->vtnet_tx_nsegs = VTNET_TX_SEGS_MIN;
773
774 sc->vtnet_req_vq_pairs = 1;
775 sc->vtnet_max_vq_pairs = 1;
776
777 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VQ)) {
778 sc->vtnet_flags |= VTNET_FLAG_CTRL_VQ;
779
780 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_RX))
781 sc->vtnet_flags |= VTNET_FLAG_CTRL_RX;
782 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VLAN))
783 sc->vtnet_flags |= VTNET_FLAG_VLAN_FILTER;
784 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_MAC_ADDR))
785 sc->vtnet_flags |= VTNET_FLAG_CTRL_MAC;
786
787 if (virtio_with_feature(dev, VIRTIO_NET_F_MQ)) {
788 sc->vtnet_max_vq_pairs = virtio_read_dev_config_2(dev,
789 offsetof(struct virtio_net_config,
790 max_virtqueue_pairs));
791 }
792 }
793
794 if (sc->vtnet_max_vq_pairs > 1) {
795 int req;
796
797 /*
798 * Limit the maximum number of requested queue pairs to the
799 * number of CPUs and the configured maximum.
800 */
801 req = vtnet_tunable_int(sc, "mq_max_pairs", vtnet_mq_max_pairs);
802 if (req < 0)
803 req = 1;
804 if (req == 0)
805 req = mp_ncpus;
806 if (req > sc->vtnet_max_vq_pairs)
807 req = sc->vtnet_max_vq_pairs;
808 if (req > mp_ncpus)
809 req = mp_ncpus;
810 if (req > 1) {
811 sc->vtnet_req_vq_pairs = req;
812 sc->vtnet_flags |= VTNET_FLAG_MQ;
813 }
814 }
815
816 return (0);
817 }
818
819 static int
vtnet_init_rxq(struct vtnet_softc * sc,int id)820 vtnet_init_rxq(struct vtnet_softc *sc, int id)
821 {
822 struct vtnet_rxq *rxq;
823
824 rxq = &sc->vtnet_rxqs[id];
825
826 snprintf(rxq->vtnrx_name, sizeof(rxq->vtnrx_name), "%s-rx%d",
827 device_get_nameunit(sc->vtnet_dev), id);
828 mtx_init(&rxq->vtnrx_mtx, rxq->vtnrx_name, NULL, MTX_DEF);
829
830 rxq->vtnrx_sc = sc;
831 rxq->vtnrx_id = id;
832
833 rxq->vtnrx_sg = sglist_alloc(sc->vtnet_rx_nsegs, M_NOWAIT);
834 if (rxq->vtnrx_sg == NULL)
835 return (ENOMEM);
836
837 #if defined(INET) || defined(INET6)
838 if (vtnet_software_lro(sc)) {
839 if (tcp_lro_init_args(&rxq->vtnrx_lro, sc->vtnet_ifp,
840 sc->vtnet_lro_entry_count, sc->vtnet_lro_mbufq_depth) != 0)
841 return (ENOMEM);
842 }
843 #endif
844
845 NET_TASK_INIT(&rxq->vtnrx_intrtask, 0, vtnet_rxq_tq_intr, rxq);
846 rxq->vtnrx_tq = taskqueue_create(rxq->vtnrx_name, M_NOWAIT,
847 taskqueue_thread_enqueue, &rxq->vtnrx_tq);
848
849 return (rxq->vtnrx_tq == NULL ? ENOMEM : 0);
850 }
851
852 static int
vtnet_init_txq(struct vtnet_softc * sc,int id)853 vtnet_init_txq(struct vtnet_softc *sc, int id)
854 {
855 struct vtnet_txq *txq;
856
857 txq = &sc->vtnet_txqs[id];
858
859 snprintf(txq->vtntx_name, sizeof(txq->vtntx_name), "%s-tx%d",
860 device_get_nameunit(sc->vtnet_dev), id);
861 mtx_init(&txq->vtntx_mtx, txq->vtntx_name, NULL, MTX_DEF);
862
863 txq->vtntx_sc = sc;
864 txq->vtntx_id = id;
865
866 txq->vtntx_sg = sglist_alloc(sc->vtnet_tx_nsegs, M_NOWAIT);
867 if (txq->vtntx_sg == NULL)
868 return (ENOMEM);
869
870 #ifndef VTNET_LEGACY_TX
871 txq->vtntx_br = buf_ring_alloc(VTNET_DEFAULT_BUFRING_SIZE, M_DEVBUF,
872 M_NOWAIT, &txq->vtntx_mtx);
873 if (txq->vtntx_br == NULL)
874 return (ENOMEM);
875
876 TASK_INIT(&txq->vtntx_defrtask, 0, vtnet_txq_tq_deferred, txq);
877 #endif
878 TASK_INIT(&txq->vtntx_intrtask, 0, vtnet_txq_tq_intr, txq);
879 txq->vtntx_tq = taskqueue_create(txq->vtntx_name, M_NOWAIT,
880 taskqueue_thread_enqueue, &txq->vtntx_tq);
881 if (txq->vtntx_tq == NULL)
882 return (ENOMEM);
883
884 return (0);
885 }
886
887 static int
vtnet_alloc_rxtx_queues(struct vtnet_softc * sc)888 vtnet_alloc_rxtx_queues(struct vtnet_softc *sc)
889 {
890 int i, npairs, error;
891
892 npairs = sc->vtnet_max_vq_pairs;
893
894 sc->vtnet_rxqs = malloc(sizeof(struct vtnet_rxq) * npairs, M_DEVBUF,
895 M_NOWAIT | M_ZERO);
896 sc->vtnet_txqs = malloc(sizeof(struct vtnet_txq) * npairs, M_DEVBUF,
897 M_NOWAIT | M_ZERO);
898 if (sc->vtnet_rxqs == NULL || sc->vtnet_txqs == NULL)
899 return (ENOMEM);
900
901 for (i = 0; i < npairs; i++) {
902 error = vtnet_init_rxq(sc, i);
903 if (error)
904 return (error);
905 error = vtnet_init_txq(sc, i);
906 if (error)
907 return (error);
908 }
909
910 vtnet_set_rx_process_limit(sc);
911 vtnet_setup_queue_sysctl(sc);
912
913 return (0);
914 }
915
916 static void
vtnet_destroy_rxq(struct vtnet_rxq * rxq)917 vtnet_destroy_rxq(struct vtnet_rxq *rxq)
918 {
919
920 rxq->vtnrx_sc = NULL;
921 rxq->vtnrx_id = -1;
922
923 #if defined(INET) || defined(INET6)
924 tcp_lro_free(&rxq->vtnrx_lro);
925 #endif
926
927 if (rxq->vtnrx_sg != NULL) {
928 sglist_free(rxq->vtnrx_sg);
929 rxq->vtnrx_sg = NULL;
930 }
931
932 if (mtx_initialized(&rxq->vtnrx_mtx) != 0)
933 mtx_destroy(&rxq->vtnrx_mtx);
934 }
935
936 static void
vtnet_destroy_txq(struct vtnet_txq * txq)937 vtnet_destroy_txq(struct vtnet_txq *txq)
938 {
939
940 txq->vtntx_sc = NULL;
941 txq->vtntx_id = -1;
942
943 if (txq->vtntx_sg != NULL) {
944 sglist_free(txq->vtntx_sg);
945 txq->vtntx_sg = NULL;
946 }
947
948 #ifndef VTNET_LEGACY_TX
949 if (txq->vtntx_br != NULL) {
950 buf_ring_free(txq->vtntx_br, M_DEVBUF);
951 txq->vtntx_br = NULL;
952 }
953 #endif
954
955 if (mtx_initialized(&txq->vtntx_mtx) != 0)
956 mtx_destroy(&txq->vtntx_mtx);
957 }
958
959 static void
vtnet_free_rxtx_queues(struct vtnet_softc * sc)960 vtnet_free_rxtx_queues(struct vtnet_softc *sc)
961 {
962 int i;
963
964 if (sc->vtnet_rxqs != NULL) {
965 for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
966 vtnet_destroy_rxq(&sc->vtnet_rxqs[i]);
967 free(sc->vtnet_rxqs, M_DEVBUF);
968 sc->vtnet_rxqs = NULL;
969 }
970
971 if (sc->vtnet_txqs != NULL) {
972 for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
973 vtnet_destroy_txq(&sc->vtnet_txqs[i]);
974 free(sc->vtnet_txqs, M_DEVBUF);
975 sc->vtnet_txqs = NULL;
976 }
977 }
978
979 static int
vtnet_alloc_rx_filters(struct vtnet_softc * sc)980 vtnet_alloc_rx_filters(struct vtnet_softc *sc)
981 {
982
983 if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
984 sc->vtnet_mac_filter = malloc(sizeof(struct vtnet_mac_filter),
985 M_DEVBUF, M_NOWAIT | M_ZERO);
986 if (sc->vtnet_mac_filter == NULL)
987 return (ENOMEM);
988 }
989
990 if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
991 sc->vtnet_vlan_filter = malloc(sizeof(uint32_t) *
992 VTNET_VLAN_FILTER_NWORDS, M_DEVBUF, M_NOWAIT | M_ZERO);
993 if (sc->vtnet_vlan_filter == NULL)
994 return (ENOMEM);
995 }
996
997 return (0);
998 }
999
1000 static void
vtnet_free_rx_filters(struct vtnet_softc * sc)1001 vtnet_free_rx_filters(struct vtnet_softc *sc)
1002 {
1003
1004 if (sc->vtnet_mac_filter != NULL) {
1005 free(sc->vtnet_mac_filter, M_DEVBUF);
1006 sc->vtnet_mac_filter = NULL;
1007 }
1008
1009 if (sc->vtnet_vlan_filter != NULL) {
1010 free(sc->vtnet_vlan_filter, M_DEVBUF);
1011 sc->vtnet_vlan_filter = NULL;
1012 }
1013 }
1014
1015 static int
vtnet_alloc_virtqueues(struct vtnet_softc * sc)1016 vtnet_alloc_virtqueues(struct vtnet_softc *sc)
1017 {
1018 device_t dev;
1019 struct vq_alloc_info *info;
1020 struct vtnet_rxq *rxq;
1021 struct vtnet_txq *txq;
1022 int i, idx, flags, nvqs, error;
1023
1024 dev = sc->vtnet_dev;
1025 flags = 0;
1026
1027 nvqs = sc->vtnet_max_vq_pairs * 2;
1028 if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ)
1029 nvqs++;
1030
1031 info = malloc(sizeof(struct vq_alloc_info) * nvqs, M_TEMP, M_NOWAIT);
1032 if (info == NULL)
1033 return (ENOMEM);
1034
1035 for (i = 0, idx = 0; i < sc->vtnet_req_vq_pairs; i++, idx += 2) {
1036 rxq = &sc->vtnet_rxqs[i];
1037 VQ_ALLOC_INFO_INIT(&info[idx], sc->vtnet_rx_nsegs,
1038 vtnet_rx_vq_intr, rxq, &rxq->vtnrx_vq,
1039 "%s-rx%d", device_get_nameunit(dev), rxq->vtnrx_id);
1040
1041 txq = &sc->vtnet_txqs[i];
1042 VQ_ALLOC_INFO_INIT(&info[idx+1], sc->vtnet_tx_nsegs,
1043 vtnet_tx_vq_intr, txq, &txq->vtntx_vq,
1044 "%s-tx%d", device_get_nameunit(dev), txq->vtntx_id);
1045 }
1046
1047 /* These queues will not be used so allocate the minimum resources. */
1048 for (/**/; i < sc->vtnet_max_vq_pairs; i++, idx += 2) {
1049 rxq = &sc->vtnet_rxqs[i];
1050 VQ_ALLOC_INFO_INIT(&info[idx], 0, NULL, rxq, &rxq->vtnrx_vq,
1051 "%s-rx%d", device_get_nameunit(dev), rxq->vtnrx_id);
1052
1053 txq = &sc->vtnet_txqs[i];
1054 VQ_ALLOC_INFO_INIT(&info[idx+1], 0, NULL, txq, &txq->vtntx_vq,
1055 "%s-tx%d", device_get_nameunit(dev), txq->vtntx_id);
1056 }
1057
1058 if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
1059 VQ_ALLOC_INFO_INIT(&info[idx], 0, NULL, NULL,
1060 &sc->vtnet_ctrl_vq, "%s ctrl", device_get_nameunit(dev));
1061 }
1062
1063 /*
1064 * TODO: Enable interrupt binding if this is multiqueue. This will
1065 * only matter when per-virtqueue MSIX is available.
1066 */
1067 if (sc->vtnet_flags & VTNET_FLAG_MQ)
1068 flags |= 0;
1069
1070 error = virtio_alloc_virtqueues(dev, flags, nvqs, info);
1071 free(info, M_TEMP);
1072
1073 return (error);
1074 }
1075
1076 static void
vtnet_alloc_interface(struct vtnet_softc * sc)1077 vtnet_alloc_interface(struct vtnet_softc *sc)
1078 {
1079 device_t dev;
1080 if_t ifp;
1081
1082 dev = sc->vtnet_dev;
1083
1084 ifp = if_alloc(IFT_ETHER);
1085 sc->vtnet_ifp = ifp;
1086 if_setsoftc(ifp, sc);
1087 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1088 }
1089
1090 static int
vtnet_setup_interface(struct vtnet_softc * sc)1091 vtnet_setup_interface(struct vtnet_softc *sc)
1092 {
1093 device_t dev;
1094 struct pfil_head_args pa;
1095 if_t ifp;
1096
1097 dev = sc->vtnet_dev;
1098 ifp = sc->vtnet_ifp;
1099
1100 if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
1101 if_setbaudrate(ifp, IF_Gbps(10));
1102 if_setinitfn(ifp, vtnet_init);
1103 if_setioctlfn(ifp, vtnet_ioctl);
1104 if_setgetcounterfn(ifp, vtnet_get_counter);
1105 #ifndef VTNET_LEGACY_TX
1106 if_settransmitfn(ifp, vtnet_txq_mq_start);
1107 if_setqflushfn(ifp, vtnet_qflush);
1108 #else
1109 struct virtqueue *vq = sc->vtnet_txqs[0].vtntx_vq;
1110 if_setstartfn(ifp, vtnet_start);
1111 if_setsendqlen(ifp, virtqueue_size(vq) - 1);
1112 if_setsendqready(ifp);
1113 #endif
1114
1115 vtnet_get_macaddr(sc);
1116
1117 if (virtio_with_feature(dev, VIRTIO_NET_F_STATUS))
1118 if_setcapabilitiesbit(ifp, IFCAP_LINKSTATE, 0);
1119
1120 ifmedia_init(&sc->vtnet_media, 0, vtnet_ifmedia_upd, vtnet_ifmedia_sts);
1121 ifmedia_add(&sc->vtnet_media, IFM_ETHER | IFM_AUTO, 0, NULL);
1122 ifmedia_set(&sc->vtnet_media, IFM_ETHER | IFM_AUTO);
1123
1124 if (virtio_with_feature(dev, VIRTIO_NET_F_CSUM)) {
1125 int gso;
1126
1127 if_setcapabilitiesbit(ifp, IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6, 0);
1128
1129 gso = virtio_with_feature(dev, VIRTIO_NET_F_GSO);
1130 if (gso || virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4))
1131 if_setcapabilitiesbit(ifp, IFCAP_TSO4, 0);
1132 if (gso || virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
1133 if_setcapabilitiesbit(ifp, IFCAP_TSO6, 0);
1134 if (gso || virtio_with_feature(dev, VIRTIO_NET_F_HOST_ECN))
1135 sc->vtnet_flags |= VTNET_FLAG_TSO_ECN;
1136
1137 if (if_getcapabilities(ifp) & (IFCAP_TSO4 | IFCAP_TSO6)) {
1138 int tso_maxlen;
1139
1140 if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTSO, 0);
1141
1142 tso_maxlen = vtnet_tunable_int(sc, "tso_maxlen",
1143 vtnet_tso_maxlen);
1144 if_sethwtsomax(ifp, tso_maxlen -
1145 (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN));
1146 if_sethwtsomaxsegcount(ifp, sc->vtnet_tx_nsegs - 1);
1147 if_sethwtsomaxsegsize(ifp, PAGE_SIZE);
1148 }
1149 }
1150
1151 if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_CSUM)) {
1152 if_setcapabilitiesbit(ifp, IFCAP_RXCSUM, 0);
1153 #ifdef notyet
1154 /* BMV: Rx checksums not distinguished between IPv4 and IPv6. */
1155 if_setcapabilitiesbit(ifp, IFCAP_RXCSUM_IPV6, 0);
1156 #endif
1157
1158 if (vtnet_tunable_int(sc, "fixup_needs_csum",
1159 vtnet_fixup_needs_csum) != 0)
1160 sc->vtnet_flags |= VTNET_FLAG_FIXUP_NEEDS_CSUM;
1161
1162 /* Support either "hardware" or software LRO. */
1163 if_setcapabilitiesbit(ifp, IFCAP_LRO, 0);
1164 }
1165
1166 if (if_getcapabilities(ifp) & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6)) {
1167 /*
1168 * VirtIO does not support VLAN tagging, but we can fake
1169 * it by inserting and removing the 802.1Q header during
1170 * transmit and receive. We are then able to do checksum
1171 * offloading of VLAN frames.
1172 */
1173 if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM, 0);
1174 }
1175
1176 if (sc->vtnet_max_mtu >= ETHERMTU_JUMBO)
1177 if_setcapabilitiesbit(ifp, IFCAP_JUMBO_MTU, 0);
1178 if_setcapabilitiesbit(ifp, IFCAP_VLAN_MTU, 0);
1179
1180 /*
1181 * Capabilities after here are not enabled by default.
1182 */
1183 if_setcapenable(ifp, if_getcapabilities(ifp));
1184
1185 if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
1186 if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER, 0);
1187
1188 sc->vtnet_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
1189 vtnet_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
1190 sc->vtnet_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
1191 vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
1192 }
1193
1194 ether_ifattach(ifp, sc->vtnet_hwaddr);
1195
1196 /* Tell the upper layer(s) we support long frames. */
1197 if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
1198
1199 DEBUGNET_SET(ifp, vtnet);
1200
1201 pa.pa_version = PFIL_VERSION;
1202 pa.pa_flags = PFIL_IN;
1203 pa.pa_type = PFIL_TYPE_ETHERNET;
1204 pa.pa_headname = if_name(ifp);
1205 sc->vtnet_pfil = pfil_head_register(&pa);
1206
1207 return (0);
1208 }
1209
1210 static int
vtnet_rx_cluster_size(struct vtnet_softc * sc,int mtu)1211 vtnet_rx_cluster_size(struct vtnet_softc *sc, int mtu)
1212 {
1213 int framesz;
1214
1215 if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS)
1216 return (MJUMPAGESIZE);
1217 else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG)
1218 return (MCLBYTES);
1219
1220 /*
1221 * Try to scale the receive mbuf cluster size from the MTU. We
1222 * could also use the VQ size to influence the selected size,
1223 * but that would only matter for very small queues.
1224 */
1225 if (vtnet_modern(sc)) {
1226 MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr_v1));
1227 framesz = sizeof(struct virtio_net_hdr_v1);
1228 } else
1229 framesz = sizeof(struct vtnet_rx_header);
1230 framesz += sizeof(struct ether_vlan_header) + mtu;
1231 /*
1232 * Account for the offsetting we'll do elsewhere so we allocate the
1233 * right size for the mtu.
1234 */
1235 if (VTNET_ETHER_ALIGN != 0 && sc->vtnet_hdr_size % 4 == 0) {
1236 framesz += VTNET_ETHER_ALIGN;
1237 }
1238
1239 if (framesz <= MCLBYTES)
1240 return (MCLBYTES);
1241 else if (framesz <= MJUMPAGESIZE)
1242 return (MJUMPAGESIZE);
1243 else if (framesz <= MJUM9BYTES)
1244 return (MJUM9BYTES);
1245
1246 /* Sane default; avoid 16KB clusters. */
1247 return (MCLBYTES);
1248 }
1249
1250 static int
vtnet_ioctl_mtu(struct vtnet_softc * sc,u_int mtu)1251 vtnet_ioctl_mtu(struct vtnet_softc *sc, u_int mtu)
1252 {
1253 if_t ifp;
1254 int clustersz;
1255
1256 ifp = sc->vtnet_ifp;
1257 VTNET_CORE_LOCK_ASSERT(sc);
1258
1259 if (if_getmtu(ifp) == mtu)
1260 return (0);
1261 else if (mtu < ETHERMIN || mtu > sc->vtnet_max_mtu)
1262 return (EINVAL);
1263
1264 if_setmtu(ifp, mtu);
1265 clustersz = vtnet_rx_cluster_size(sc, mtu);
1266
1267 if (clustersz != sc->vtnet_rx_clustersz &&
1268 if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1269 if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
1270 vtnet_init_locked(sc, 0);
1271 }
1272
1273 return (0);
1274 }
1275
1276 static int
vtnet_ioctl_ifflags(struct vtnet_softc * sc)1277 vtnet_ioctl_ifflags(struct vtnet_softc *sc)
1278 {
1279 if_t ifp;
1280 int drv_running;
1281
1282 ifp = sc->vtnet_ifp;
1283 drv_running = (if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0;
1284
1285 VTNET_CORE_LOCK_ASSERT(sc);
1286
1287 if ((if_getflags(ifp) & IFF_UP) == 0) {
1288 if (drv_running)
1289 vtnet_stop(sc);
1290 goto out;
1291 }
1292
1293 if (!drv_running) {
1294 vtnet_init_locked(sc, 0);
1295 goto out;
1296 }
1297
1298 if ((if_getflags(ifp) ^ sc->vtnet_if_flags) &
1299 (IFF_PROMISC | IFF_ALLMULTI)) {
1300 if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX)
1301 vtnet_rx_filter(sc);
1302 else {
1303 /*
1304 * We don't support filtering out multicast, so
1305 * ALLMULTI is always set.
1306 */
1307 if_setflagbits(ifp, IFF_ALLMULTI, 0);
1308 if_setflagbits(ifp, IFF_PROMISC, 0);
1309 }
1310 }
1311
1312 out:
1313 sc->vtnet_if_flags = if_getflags(ifp);
1314 return (0);
1315 }
1316
1317 static int
vtnet_ioctl_multi(struct vtnet_softc * sc)1318 vtnet_ioctl_multi(struct vtnet_softc *sc)
1319 {
1320 if_t ifp;
1321
1322 ifp = sc->vtnet_ifp;
1323
1324 VTNET_CORE_LOCK_ASSERT(sc);
1325
1326 if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX &&
1327 if_getdrvflags(ifp) & IFF_DRV_RUNNING)
1328 vtnet_rx_filter_mac(sc);
1329
1330 return (0);
1331 }
1332
1333 static int
vtnet_ioctl_ifcap(struct vtnet_softc * sc,struct ifreq * ifr)1334 vtnet_ioctl_ifcap(struct vtnet_softc *sc, struct ifreq *ifr)
1335 {
1336 if_t ifp;
1337 int mask, reinit, update;
1338
1339 ifp = sc->vtnet_ifp;
1340 mask = (ifr->ifr_reqcap & if_getcapabilities(ifp)) ^ if_getcapenable(ifp);
1341 reinit = update = 0;
1342
1343 VTNET_CORE_LOCK_ASSERT(sc);
1344
1345 if (mask & IFCAP_TXCSUM)
1346 if_togglecapenable(ifp, IFCAP_TXCSUM);
1347 if (mask & IFCAP_TXCSUM_IPV6)
1348 if_togglecapenable(ifp, IFCAP_TXCSUM_IPV6);
1349 if (mask & IFCAP_TSO4)
1350 if_togglecapenable(ifp, IFCAP_TSO4);
1351 if (mask & IFCAP_TSO6)
1352 if_togglecapenable(ifp, IFCAP_TSO6);
1353
1354 if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO)) {
1355 /*
1356 * These Rx features require the negotiated features to
1357 * be updated. Avoid a full reinit if possible.
1358 */
1359 if (sc->vtnet_features & VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)
1360 update = 1;
1361 else
1362 reinit = 1;
1363
1364 /* BMV: Avoid needless renegotiation for just software LRO. */
1365 if ((mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO)) ==
1366 IFCAP_LRO && vtnet_software_lro(sc))
1367 reinit = update = 0;
1368
1369 if (mask & IFCAP_RXCSUM)
1370 if_togglecapenable(ifp, IFCAP_RXCSUM);
1371 if (mask & IFCAP_RXCSUM_IPV6)
1372 if_togglecapenable(ifp, IFCAP_RXCSUM_IPV6);
1373 if (mask & IFCAP_LRO)
1374 if_togglecapenable(ifp, IFCAP_LRO);
1375
1376 /*
1377 * VirtIO does not distinguish between IPv4 and IPv6 checksums
1378 * so treat them as a pair. Guest TSO (LRO) requires receive
1379 * checksums.
1380 */
1381 if (if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) {
1382 if_setcapenablebit(ifp, IFCAP_RXCSUM, 0);
1383 #ifdef notyet
1384 if_setcapenablebit(ifp, IFCAP_RXCSUM_IPV6, 0);
1385 #endif
1386 } else
1387 if_setcapenablebit(ifp, 0,
1388 (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO));
1389 }
1390
1391 if (mask & IFCAP_VLAN_HWFILTER) {
1392 /* These Rx features require renegotiation. */
1393 reinit = 1;
1394
1395 if (mask & IFCAP_VLAN_HWFILTER)
1396 if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER);
1397 }
1398
1399 if (mask & IFCAP_VLAN_HWTSO)
1400 if_togglecapenable(ifp, IFCAP_VLAN_HWTSO);
1401 if (mask & IFCAP_VLAN_HWTAGGING)
1402 if_togglecapenable(ifp, IFCAP_VLAN_HWTAGGING);
1403
1404 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1405 if (reinit) {
1406 if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
1407 vtnet_init_locked(sc, 0);
1408 } else if (update)
1409 vtnet_update_rx_offloads(sc);
1410 }
1411
1412 return (0);
1413 }
1414
1415 static int
vtnet_ioctl(if_t ifp,u_long cmd,caddr_t data)1416 vtnet_ioctl(if_t ifp, u_long cmd, caddr_t data)
1417 {
1418 struct vtnet_softc *sc;
1419 struct ifreq *ifr;
1420 int error;
1421
1422 sc = if_getsoftc(ifp);
1423 ifr = (struct ifreq *) data;
1424 error = 0;
1425
1426 switch (cmd) {
1427 case SIOCSIFMTU:
1428 VTNET_CORE_LOCK(sc);
1429 error = vtnet_ioctl_mtu(sc, ifr->ifr_mtu);
1430 VTNET_CORE_UNLOCK(sc);
1431 break;
1432
1433 case SIOCSIFFLAGS:
1434 VTNET_CORE_LOCK(sc);
1435 error = vtnet_ioctl_ifflags(sc);
1436 VTNET_CORE_UNLOCK(sc);
1437 break;
1438
1439 case SIOCADDMULTI:
1440 case SIOCDELMULTI:
1441 VTNET_CORE_LOCK(sc);
1442 error = vtnet_ioctl_multi(sc);
1443 VTNET_CORE_UNLOCK(sc);
1444 break;
1445
1446 case SIOCSIFMEDIA:
1447 case SIOCGIFMEDIA:
1448 error = ifmedia_ioctl(ifp, ifr, &sc->vtnet_media, cmd);
1449 break;
1450
1451 case SIOCSIFCAP:
1452 VTNET_CORE_LOCK(sc);
1453 error = vtnet_ioctl_ifcap(sc, ifr);
1454 VTNET_CORE_UNLOCK(sc);
1455 VLAN_CAPABILITIES(ifp);
1456 break;
1457
1458 default:
1459 error = ether_ioctl(ifp, cmd, data);
1460 break;
1461 }
1462
1463 VTNET_CORE_LOCK_ASSERT_NOTOWNED(sc);
1464
1465 return (error);
1466 }
1467
1468 static int
vtnet_rxq_populate(struct vtnet_rxq * rxq)1469 vtnet_rxq_populate(struct vtnet_rxq *rxq)
1470 {
1471 struct virtqueue *vq;
1472 int nbufs, error;
1473
1474 #ifdef DEV_NETMAP
1475 error = vtnet_netmap_rxq_populate(rxq);
1476 if (error >= 0)
1477 return (error);
1478 #endif /* DEV_NETMAP */
1479
1480 vq = rxq->vtnrx_vq;
1481 error = ENOSPC;
1482
1483 for (nbufs = 0; !virtqueue_full(vq); nbufs++) {
1484 error = vtnet_rxq_new_buf(rxq);
1485 if (error)
1486 break;
1487 }
1488
1489 if (nbufs > 0) {
1490 virtqueue_notify(vq);
1491 /*
1492 * EMSGSIZE signifies the virtqueue did not have enough
1493 * entries available to hold the last mbuf. This is not
1494 * an error.
1495 */
1496 if (error == EMSGSIZE)
1497 error = 0;
1498 }
1499
1500 return (error);
1501 }
1502
1503 static void
vtnet_rxq_free_mbufs(struct vtnet_rxq * rxq)1504 vtnet_rxq_free_mbufs(struct vtnet_rxq *rxq)
1505 {
1506 struct virtqueue *vq;
1507 struct mbuf *m;
1508 int last;
1509 #ifdef DEV_NETMAP
1510 struct netmap_kring *kring = netmap_kring_on(NA(rxq->vtnrx_sc->vtnet_ifp),
1511 rxq->vtnrx_id, NR_RX);
1512 #else /* !DEV_NETMAP */
1513 void *kring = NULL;
1514 #endif /* !DEV_NETMAP */
1515
1516 vq = rxq->vtnrx_vq;
1517 last = 0;
1518
1519 while ((m = virtqueue_drain(vq, &last)) != NULL) {
1520 if (kring == NULL)
1521 m_freem(m);
1522 }
1523
1524 KASSERT(virtqueue_empty(vq),
1525 ("%s: mbufs remaining in rx queue %p", __func__, rxq));
1526 }
1527
1528 static struct mbuf *
vtnet_rx_alloc_buf(struct vtnet_softc * sc,int nbufs,struct mbuf ** m_tailp)1529 vtnet_rx_alloc_buf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp)
1530 {
1531 struct mbuf *m_head, *m_tail, *m;
1532 int i, size;
1533
1534 m_head = NULL;
1535 size = sc->vtnet_rx_clustersz;
1536
1537 KASSERT(nbufs == 1 || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
1538 ("%s: mbuf %d chain requested without LRO_NOMRG", __func__, nbufs));
1539
1540 for (i = 0; i < nbufs; i++) {
1541 m = m_getjcl(M_NOWAIT, MT_DATA, i == 0 ? M_PKTHDR : 0, size);
1542 if (m == NULL) {
1543 sc->vtnet_stats.mbuf_alloc_failed++;
1544 m_freem(m_head);
1545 return (NULL);
1546 }
1547
1548 m->m_len = size;
1549 /*
1550 * Need to offset the mbuf if the header we're going to add
1551 * will misalign.
1552 */
1553 if (VTNET_ETHER_ALIGN != 0 && sc->vtnet_hdr_size % 4 == 0) {
1554 m_adj(m, VTNET_ETHER_ALIGN);
1555 }
1556 if (m_head != NULL) {
1557 m_tail->m_next = m;
1558 m_tail = m;
1559 } else
1560 m_head = m_tail = m;
1561 }
1562
1563 if (m_tailp != NULL)
1564 *m_tailp = m_tail;
1565
1566 return (m_head);
1567 }
1568
1569 /*
1570 * Slow path for when LRO without mergeable buffers is negotiated.
1571 */
1572 static int
vtnet_rxq_replace_lro_nomrg_buf(struct vtnet_rxq * rxq,struct mbuf * m0,int len0)1573 vtnet_rxq_replace_lro_nomrg_buf(struct vtnet_rxq *rxq, struct mbuf *m0,
1574 int len0)
1575 {
1576 struct vtnet_softc *sc;
1577 struct mbuf *m, *m_prev, *m_new, *m_tail;
1578 int len, clustersz, nreplace, error;
1579
1580 sc = rxq->vtnrx_sc;
1581 clustersz = sc->vtnet_rx_clustersz;
1582 /*
1583 * Need to offset the mbuf if the header we're going to add will
1584 * misalign, account for that here.
1585 */
1586 if (VTNET_ETHER_ALIGN != 0 && sc->vtnet_hdr_size % 4 == 0)
1587 clustersz -= VTNET_ETHER_ALIGN;
1588
1589 m_prev = NULL;
1590 m_tail = NULL;
1591 nreplace = 0;
1592
1593 m = m0;
1594 len = len0;
1595
1596 /*
1597 * Since these mbuf chains are so large, avoid allocating a complete
1598 * replacement when the received frame did not consume the entire
1599 * chain. Unused mbufs are moved to the tail of the replacement mbuf.
1600 */
1601 while (len > 0) {
1602 if (m == NULL) {
1603 sc->vtnet_stats.rx_frame_too_large++;
1604 return (EMSGSIZE);
1605 }
1606
1607 /*
1608 * Every mbuf should have the expected cluster size since that
1609 * is also used to allocate the replacements.
1610 */
1611 KASSERT(m->m_len == clustersz,
1612 ("%s: mbuf size %d not expected cluster size %d", __func__,
1613 m->m_len, clustersz));
1614
1615 m->m_len = MIN(m->m_len, len);
1616 len -= m->m_len;
1617
1618 m_prev = m;
1619 m = m->m_next;
1620 nreplace++;
1621 }
1622
1623 KASSERT(nreplace > 0 && nreplace <= sc->vtnet_rx_nmbufs,
1624 ("%s: invalid replacement mbuf count %d max %d", __func__,
1625 nreplace, sc->vtnet_rx_nmbufs));
1626
1627 m_new = vtnet_rx_alloc_buf(sc, nreplace, &m_tail);
1628 if (m_new == NULL) {
1629 m_prev->m_len = clustersz;
1630 return (ENOBUFS);
1631 }
1632
1633 /*
1634 * Move any unused mbufs from the received mbuf chain onto the
1635 * end of the replacement chain.
1636 */
1637 if (m_prev->m_next != NULL) {
1638 m_tail->m_next = m_prev->m_next;
1639 m_prev->m_next = NULL;
1640 }
1641
1642 error = vtnet_rxq_enqueue_buf(rxq, m_new);
1643 if (error) {
1644 /*
1645 * The replacement is suppose to be an copy of the one
1646 * dequeued so this is a very unexpected error.
1647 *
1648 * Restore the m0 chain to the original state if it was
1649 * modified so we can then discard it.
1650 */
1651 if (m_tail->m_next != NULL) {
1652 m_prev->m_next = m_tail->m_next;
1653 m_tail->m_next = NULL;
1654 }
1655 m_prev->m_len = clustersz;
1656 sc->vtnet_stats.rx_enq_replacement_failed++;
1657 m_freem(m_new);
1658 }
1659
1660 return (error);
1661 }
1662
1663 static int
vtnet_rxq_replace_buf(struct vtnet_rxq * rxq,struct mbuf * m,int len)1664 vtnet_rxq_replace_buf(struct vtnet_rxq *rxq, struct mbuf *m, int len)
1665 {
1666 struct vtnet_softc *sc;
1667 struct mbuf *m_new;
1668 int error;
1669
1670 sc = rxq->vtnrx_sc;
1671
1672 if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG)
1673 return (vtnet_rxq_replace_lro_nomrg_buf(rxq, m, len));
1674
1675 MPASS(m->m_next == NULL);
1676 if (m->m_len < len)
1677 return (EMSGSIZE);
1678
1679 m_new = vtnet_rx_alloc_buf(sc, 1, NULL);
1680 if (m_new == NULL)
1681 return (ENOBUFS);
1682
1683 error = vtnet_rxq_enqueue_buf(rxq, m_new);
1684 if (error) {
1685 sc->vtnet_stats.rx_enq_replacement_failed++;
1686 m_freem(m_new);
1687 } else
1688 m->m_len = len;
1689
1690 return (error);
1691 }
1692
1693 static int
vtnet_rxq_enqueue_buf(struct vtnet_rxq * rxq,struct mbuf * m)1694 vtnet_rxq_enqueue_buf(struct vtnet_rxq *rxq, struct mbuf *m)
1695 {
1696 struct vtnet_softc *sc;
1697 struct sglist *sg;
1698 int header_inlined, error;
1699
1700 sc = rxq->vtnrx_sc;
1701 sg = rxq->vtnrx_sg;
1702
1703 KASSERT(m->m_next == NULL || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
1704 ("%s: mbuf chain without LRO_NOMRG", __func__));
1705 VTNET_RXQ_LOCK_ASSERT(rxq);
1706
1707 sglist_reset(sg);
1708 header_inlined = vtnet_modern(sc) ||
1709 (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) != 0; /* TODO: ANY_LAYOUT */
1710
1711 /*
1712 * Note: The mbuf has been already adjusted when we allocate it if we
1713 * have to do strict alignment.
1714 */
1715 if (header_inlined)
1716 error = sglist_append_mbuf(sg, m);
1717 else {
1718 struct vtnet_rx_header *rxhdr =
1719 mtod(m, struct vtnet_rx_header *);
1720 MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr));
1721
1722 /* Append the header and remaining mbuf data. */
1723 error = sglist_append(sg, &rxhdr->vrh_hdr, sc->vtnet_hdr_size);
1724 if (error)
1725 return (error);
1726 error = sglist_append(sg, &rxhdr[1],
1727 m->m_len - sizeof(struct vtnet_rx_header));
1728 if (error)
1729 return (error);
1730
1731 if (m->m_next != NULL)
1732 error = sglist_append_mbuf(sg, m->m_next);
1733 }
1734
1735 if (error)
1736 return (error);
1737
1738 return (virtqueue_enqueue(rxq->vtnrx_vq, m, sg, 0, sg->sg_nseg));
1739 }
1740
1741 static int
vtnet_rxq_new_buf(struct vtnet_rxq * rxq)1742 vtnet_rxq_new_buf(struct vtnet_rxq *rxq)
1743 {
1744 struct vtnet_softc *sc;
1745 struct mbuf *m;
1746 int error;
1747
1748 sc = rxq->vtnrx_sc;
1749
1750 m = vtnet_rx_alloc_buf(sc, sc->vtnet_rx_nmbufs, NULL);
1751 if (m == NULL)
1752 return (ENOBUFS);
1753
1754 error = vtnet_rxq_enqueue_buf(rxq, m);
1755 if (error)
1756 m_freem(m);
1757
1758 return (error);
1759 }
1760
1761 static int
vtnet_rxq_csum_needs_csum(struct vtnet_rxq * rxq,struct mbuf * m,uint16_t etype,int hoff,struct virtio_net_hdr * hdr)1762 vtnet_rxq_csum_needs_csum(struct vtnet_rxq *rxq, struct mbuf *m, uint16_t etype,
1763 int hoff, struct virtio_net_hdr *hdr)
1764 {
1765 struct vtnet_softc *sc;
1766 int error;
1767
1768 sc = rxq->vtnrx_sc;
1769
1770 /*
1771 * NEEDS_CSUM corresponds to Linux's CHECKSUM_PARTIAL, but FreeBSD does
1772 * not have an analogous CSUM flag. The checksum has been validated,
1773 * but is incomplete (TCP/UDP pseudo header).
1774 *
1775 * The packet is likely from another VM on the same host that itself
1776 * performed checksum offloading so Tx/Rx is basically a memcpy and
1777 * the checksum has little value.
1778 *
1779 * Default to receiving the packet as-is for performance reasons, but
1780 * this can cause issues if the packet is to be forwarded because it
1781 * does not contain a valid checksum. This patch may be helpful:
1782 * https://reviews.freebsd.org/D6611. In the meantime, have the driver
1783 * compute the checksum if requested.
1784 *
1785 * BMV: Need to add an CSUM_PARTIAL flag?
1786 */
1787 if ((sc->vtnet_flags & VTNET_FLAG_FIXUP_NEEDS_CSUM) == 0) {
1788 error = vtnet_rxq_csum_data_valid(rxq, m, etype, hoff, hdr);
1789 return (error);
1790 }
1791
1792 /*
1793 * Compute the checksum in the driver so the packet will contain a
1794 * valid checksum. The checksum is at csum_offset from csum_start.
1795 */
1796 switch (etype) {
1797 #if defined(INET) || defined(INET6)
1798 case ETHERTYPE_IP:
1799 case ETHERTYPE_IPV6: {
1800 int csum_off, csum_end;
1801 uint16_t csum;
1802
1803 csum_off = hdr->csum_start + hdr->csum_offset;
1804 csum_end = csum_off + sizeof(uint16_t);
1805
1806 /* Assume checksum will be in the first mbuf. */
1807 if (m->m_len < csum_end || m->m_pkthdr.len < csum_end)
1808 return (1);
1809
1810 /*
1811 * Like in_delayed_cksum()/in6_delayed_cksum(), compute the
1812 * checksum and write it at the specified offset. We could
1813 * try to verify the packet: csum_start should probably
1814 * correspond to the start of the TCP/UDP header.
1815 *
1816 * BMV: Need to properly handle UDP with zero checksum. Is
1817 * the IPv4 header checksum implicitly validated?
1818 */
1819 csum = in_cksum_skip(m, m->m_pkthdr.len, hdr->csum_start);
1820 *(uint16_t *)(mtodo(m, csum_off)) = csum;
1821 m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1822 m->m_pkthdr.csum_data = 0xFFFF;
1823 break;
1824 }
1825 #endif
1826 default:
1827 sc->vtnet_stats.rx_csum_bad_ethtype++;
1828 return (1);
1829 }
1830
1831 return (0);
1832 }
1833
1834 static int
vtnet_rxq_csum_data_valid(struct vtnet_rxq * rxq,struct mbuf * m,uint16_t etype,int hoff,struct virtio_net_hdr * hdr __unused)1835 vtnet_rxq_csum_data_valid(struct vtnet_rxq *rxq, struct mbuf *m,
1836 uint16_t etype, int hoff, struct virtio_net_hdr *hdr __unused)
1837 {
1838 #if 0
1839 struct vtnet_softc *sc;
1840 #endif
1841 int protocol;
1842
1843 #if 0
1844 sc = rxq->vtnrx_sc;
1845 #endif
1846
1847 switch (etype) {
1848 #if defined(INET)
1849 case ETHERTYPE_IP:
1850 if (__predict_false(m->m_len < hoff + sizeof(struct ip)))
1851 protocol = IPPROTO_DONE;
1852 else {
1853 struct ip *ip = (struct ip *)(m->m_data + hoff);
1854 protocol = ip->ip_p;
1855 }
1856 break;
1857 #endif
1858 #if defined(INET6)
1859 case ETHERTYPE_IPV6:
1860 if (__predict_false(m->m_len < hoff + sizeof(struct ip6_hdr))
1861 || ip6_lasthdr(m, hoff, IPPROTO_IPV6, &protocol) < 0)
1862 protocol = IPPROTO_DONE;
1863 break;
1864 #endif
1865 default:
1866 protocol = IPPROTO_DONE;
1867 break;
1868 }
1869
1870 switch (protocol) {
1871 case IPPROTO_TCP:
1872 case IPPROTO_UDP:
1873 m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1874 m->m_pkthdr.csum_data = 0xFFFF;
1875 break;
1876 default:
1877 /*
1878 * FreeBSD does not support checksum offloading of this
1879 * protocol. Let the stack re-verify the checksum later
1880 * if the protocol is supported.
1881 */
1882 #if 0
1883 if_printf(sc->vtnet_ifp,
1884 "%s: checksum offload of unsupported protocol "
1885 "etype=%#x protocol=%d csum_start=%d csum_offset=%d\n",
1886 __func__, etype, protocol, hdr->csum_start,
1887 hdr->csum_offset);
1888 #endif
1889 break;
1890 }
1891
1892 return (0);
1893 }
1894
1895 static int
vtnet_rxq_csum(struct vtnet_rxq * rxq,struct mbuf * m,struct virtio_net_hdr * hdr)1896 vtnet_rxq_csum(struct vtnet_rxq *rxq, struct mbuf *m,
1897 struct virtio_net_hdr *hdr)
1898 {
1899 const struct ether_header *eh;
1900 int hoff;
1901 uint16_t etype;
1902
1903 eh = mtod(m, const struct ether_header *);
1904 etype = ntohs(eh->ether_type);
1905 if (etype == ETHERTYPE_VLAN) {
1906 /* TODO BMV: Handle QinQ. */
1907 const struct ether_vlan_header *evh =
1908 mtod(m, const struct ether_vlan_header *);
1909 etype = ntohs(evh->evl_proto);
1910 hoff = sizeof(struct ether_vlan_header);
1911 } else
1912 hoff = sizeof(struct ether_header);
1913
1914 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
1915 return (vtnet_rxq_csum_needs_csum(rxq, m, etype, hoff, hdr));
1916 else /* VIRTIO_NET_HDR_F_DATA_VALID */
1917 return (vtnet_rxq_csum_data_valid(rxq, m, etype, hoff, hdr));
1918 }
1919
1920 static void
vtnet_rxq_discard_merged_bufs(struct vtnet_rxq * rxq,int nbufs)1921 vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *rxq, int nbufs)
1922 {
1923 struct mbuf *m;
1924
1925 while (--nbufs > 0) {
1926 m = virtqueue_dequeue(rxq->vtnrx_vq, NULL);
1927 if (m == NULL)
1928 break;
1929 vtnet_rxq_discard_buf(rxq, m);
1930 }
1931 }
1932
1933 static void
vtnet_rxq_discard_buf(struct vtnet_rxq * rxq,struct mbuf * m)1934 vtnet_rxq_discard_buf(struct vtnet_rxq *rxq, struct mbuf *m)
1935 {
1936 int error __diagused;
1937
1938 /*
1939 * Requeue the discarded mbuf. This should always be successful
1940 * since it was just dequeued.
1941 */
1942 error = vtnet_rxq_enqueue_buf(rxq, m);
1943 KASSERT(error == 0,
1944 ("%s: cannot requeue discarded mbuf %d", __func__, error));
1945 }
1946
1947 static int
vtnet_rxq_merged_eof(struct vtnet_rxq * rxq,struct mbuf * m_head,int nbufs)1948 vtnet_rxq_merged_eof(struct vtnet_rxq *rxq, struct mbuf *m_head, int nbufs)
1949 {
1950 struct vtnet_softc *sc;
1951 struct virtqueue *vq;
1952 struct mbuf *m_tail;
1953
1954 sc = rxq->vtnrx_sc;
1955 vq = rxq->vtnrx_vq;
1956 m_tail = m_head;
1957
1958 while (--nbufs > 0) {
1959 struct mbuf *m;
1960 uint32_t len;
1961
1962 m = virtqueue_dequeue(vq, &len);
1963 if (m == NULL) {
1964 rxq->vtnrx_stats.vrxs_ierrors++;
1965 goto fail;
1966 }
1967
1968 if (vtnet_rxq_new_buf(rxq) != 0) {
1969 rxq->vtnrx_stats.vrxs_iqdrops++;
1970 vtnet_rxq_discard_buf(rxq, m);
1971 if (nbufs > 1)
1972 vtnet_rxq_discard_merged_bufs(rxq, nbufs);
1973 goto fail;
1974 }
1975
1976 if (m->m_len < len)
1977 len = m->m_len;
1978
1979 m->m_len = len;
1980 m->m_flags &= ~M_PKTHDR;
1981
1982 m_head->m_pkthdr.len += len;
1983 m_tail->m_next = m;
1984 m_tail = m;
1985 }
1986
1987 return (0);
1988
1989 fail:
1990 sc->vtnet_stats.rx_mergeable_failed++;
1991 m_freem(m_head);
1992
1993 return (1);
1994 }
1995
1996 #if defined(INET) || defined(INET6)
1997 static int
vtnet_lro_rx(struct vtnet_rxq * rxq,struct mbuf * m)1998 vtnet_lro_rx(struct vtnet_rxq *rxq, struct mbuf *m)
1999 {
2000 struct lro_ctrl *lro;
2001
2002 lro = &rxq->vtnrx_lro;
2003
2004 if (lro->lro_mbuf_max != 0) {
2005 tcp_lro_queue_mbuf(lro, m);
2006 return (0);
2007 }
2008
2009 return (tcp_lro_rx(lro, m, 0));
2010 }
2011 #endif
2012
2013 static void
vtnet_rxq_input(struct vtnet_rxq * rxq,struct mbuf * m,struct virtio_net_hdr * hdr)2014 vtnet_rxq_input(struct vtnet_rxq *rxq, struct mbuf *m,
2015 struct virtio_net_hdr *hdr)
2016 {
2017 struct vtnet_softc *sc;
2018 if_t ifp;
2019
2020 sc = rxq->vtnrx_sc;
2021 ifp = sc->vtnet_ifp;
2022
2023 if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) {
2024 struct ether_header *eh = mtod(m, struct ether_header *);
2025 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2026 vtnet_vlan_tag_remove(m);
2027 /*
2028 * With the 802.1Q header removed, update the
2029 * checksum starting location accordingly.
2030 */
2031 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
2032 hdr->csum_start -= ETHER_VLAN_ENCAP_LEN;
2033 }
2034 }
2035
2036 m->m_pkthdr.flowid = rxq->vtnrx_id;
2037 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2038
2039 if (hdr->flags &
2040 (VIRTIO_NET_HDR_F_NEEDS_CSUM | VIRTIO_NET_HDR_F_DATA_VALID)) {
2041 if (vtnet_rxq_csum(rxq, m, hdr) == 0)
2042 rxq->vtnrx_stats.vrxs_csum++;
2043 else
2044 rxq->vtnrx_stats.vrxs_csum_failed++;
2045 }
2046
2047 if (hdr->gso_size != 0) {
2048 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
2049 case VIRTIO_NET_HDR_GSO_TCPV4:
2050 case VIRTIO_NET_HDR_GSO_TCPV6:
2051 m->m_pkthdr.lro_nsegs =
2052 howmany(m->m_pkthdr.len, hdr->gso_size);
2053 rxq->vtnrx_stats.vrxs_host_lro++;
2054 break;
2055 }
2056 }
2057
2058 rxq->vtnrx_stats.vrxs_ipackets++;
2059 rxq->vtnrx_stats.vrxs_ibytes += m->m_pkthdr.len;
2060
2061 #if defined(INET) || defined(INET6)
2062 if (vtnet_software_lro(sc) && if_getcapenable(ifp) & IFCAP_LRO) {
2063 if (vtnet_lro_rx(rxq, m) == 0)
2064 return;
2065 }
2066 #endif
2067
2068 if_input(ifp, m);
2069 }
2070
2071 static int
vtnet_rxq_eof(struct vtnet_rxq * rxq)2072 vtnet_rxq_eof(struct vtnet_rxq *rxq)
2073 {
2074 struct virtio_net_hdr lhdr, *hdr;
2075 struct vtnet_softc *sc;
2076 if_t ifp;
2077 struct virtqueue *vq;
2078 int deq, count;
2079
2080 sc = rxq->vtnrx_sc;
2081 vq = rxq->vtnrx_vq;
2082 ifp = sc->vtnet_ifp;
2083 deq = 0;
2084 count = sc->vtnet_rx_process_limit;
2085
2086 VTNET_RXQ_LOCK_ASSERT(rxq);
2087
2088 CURVNET_SET_QUIET(if_getvnet(ifp));
2089 while (count-- > 0) {
2090 struct mbuf *m;
2091 uint32_t len, nbufs, adjsz;
2092
2093 m = virtqueue_dequeue(vq, &len);
2094 if (m == NULL)
2095 break;
2096 deq++;
2097
2098 if (len < sc->vtnet_hdr_size + ETHER_HDR_LEN) {
2099 rxq->vtnrx_stats.vrxs_ierrors++;
2100 vtnet_rxq_discard_buf(rxq, m);
2101 continue;
2102 }
2103
2104 if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) {
2105 struct virtio_net_hdr_mrg_rxbuf *mhdr =
2106 mtod(m, struct virtio_net_hdr_mrg_rxbuf *);
2107 kmsan_mark(mhdr, sizeof(*mhdr), KMSAN_STATE_INITED);
2108 nbufs = vtnet_htog16(sc, mhdr->num_buffers);
2109 adjsz = sizeof(struct virtio_net_hdr_mrg_rxbuf);
2110 } else if (vtnet_modern(sc)) {
2111 nbufs = 1; /* num_buffers is always 1 */
2112 adjsz = sizeof(struct virtio_net_hdr_v1);
2113 } else {
2114 nbufs = 1;
2115 adjsz = sizeof(struct vtnet_rx_header);
2116 /*
2117 * Account for our gap between the header and start of
2118 * data to keep the segments separated.
2119 */
2120 len += VTNET_RX_HEADER_PAD;
2121 }
2122
2123 if (vtnet_rxq_replace_buf(rxq, m, len) != 0) {
2124 rxq->vtnrx_stats.vrxs_iqdrops++;
2125 vtnet_rxq_discard_buf(rxq, m);
2126 if (nbufs > 1)
2127 vtnet_rxq_discard_merged_bufs(rxq, nbufs);
2128 continue;
2129 }
2130
2131 m->m_pkthdr.len = len;
2132 m->m_pkthdr.rcvif = ifp;
2133 m->m_pkthdr.csum_flags = 0;
2134
2135 if (nbufs > 1) {
2136 /* Dequeue the rest of chain. */
2137 if (vtnet_rxq_merged_eof(rxq, m, nbufs) != 0)
2138 continue;
2139 }
2140
2141 kmsan_mark_mbuf(m, KMSAN_STATE_INITED);
2142
2143 /*
2144 * Save an endian swapped version of the header prior to it
2145 * being stripped. The header is always at the start of the
2146 * mbuf data. num_buffers was already saved (and not needed)
2147 * so use the standard header.
2148 */
2149 hdr = mtod(m, struct virtio_net_hdr *);
2150 lhdr.flags = hdr->flags;
2151 lhdr.gso_type = hdr->gso_type;
2152 lhdr.hdr_len = vtnet_htog16(sc, hdr->hdr_len);
2153 lhdr.gso_size = vtnet_htog16(sc, hdr->gso_size);
2154 lhdr.csum_start = vtnet_htog16(sc, hdr->csum_start);
2155 lhdr.csum_offset = vtnet_htog16(sc, hdr->csum_offset);
2156 m_adj(m, adjsz);
2157
2158 if (PFIL_HOOKED_IN(sc->vtnet_pfil)) {
2159 pfil_return_t pfil;
2160
2161 pfil = pfil_mbuf_in(sc->vtnet_pfil, &m, ifp, NULL);
2162 switch (pfil) {
2163 case PFIL_DROPPED:
2164 case PFIL_CONSUMED:
2165 continue;
2166 default:
2167 KASSERT(pfil == PFIL_PASS,
2168 ("Filter returned %d!", pfil));
2169 }
2170 }
2171
2172 vtnet_rxq_input(rxq, m, &lhdr);
2173 }
2174
2175 if (deq > 0) {
2176 #if defined(INET) || defined(INET6)
2177 if (vtnet_software_lro(sc))
2178 tcp_lro_flush_all(&rxq->vtnrx_lro);
2179 #endif
2180 virtqueue_notify(vq);
2181 }
2182 CURVNET_RESTORE();
2183
2184 return (count > 0 ? 0 : EAGAIN);
2185 }
2186
2187 static void
vtnet_rx_vq_process(struct vtnet_rxq * rxq,int tries)2188 vtnet_rx_vq_process(struct vtnet_rxq *rxq, int tries)
2189 {
2190 struct vtnet_softc *sc;
2191 if_t ifp;
2192 u_int more;
2193 #ifdef DEV_NETMAP
2194 int nmirq;
2195 #endif /* DEV_NETMAP */
2196
2197 sc = rxq->vtnrx_sc;
2198 ifp = sc->vtnet_ifp;
2199
2200 if (__predict_false(rxq->vtnrx_id >= sc->vtnet_act_vq_pairs)) {
2201 /*
2202 * Ignore this interrupt. Either this is a spurious interrupt
2203 * or multiqueue without per-VQ MSIX so every queue needs to
2204 * be polled (a brain dead configuration we could try harder
2205 * to avoid).
2206 */
2207 vtnet_rxq_disable_intr(rxq);
2208 return;
2209 }
2210
2211 VTNET_RXQ_LOCK(rxq);
2212
2213 #ifdef DEV_NETMAP
2214 /*
2215 * We call netmap_rx_irq() under lock to prevent concurrent calls.
2216 * This is not necessary to serialize the access to the RX vq, but
2217 * rather to avoid races that may happen if this interface is
2218 * attached to a VALE switch, which would cause received packets
2219 * to stall in the RX queue (nm_kr_tryget() could find the kring
2220 * busy when called from netmap_bwrap_intr_notify()).
2221 */
2222 nmirq = netmap_rx_irq(ifp, rxq->vtnrx_id, &more);
2223 if (nmirq != NM_IRQ_PASS) {
2224 VTNET_RXQ_UNLOCK(rxq);
2225 if (nmirq == NM_IRQ_RESCHED) {
2226 taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
2227 }
2228 return;
2229 }
2230 #endif /* DEV_NETMAP */
2231
2232 again:
2233 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
2234 VTNET_RXQ_UNLOCK(rxq);
2235 return;
2236 }
2237
2238 more = vtnet_rxq_eof(rxq);
2239 if (more || vtnet_rxq_enable_intr(rxq) != 0) {
2240 if (!more)
2241 vtnet_rxq_disable_intr(rxq);
2242 /*
2243 * This is an occasional condition or race (when !more),
2244 * so retry a few times before scheduling the taskqueue.
2245 */
2246 if (tries-- > 0)
2247 goto again;
2248
2249 rxq->vtnrx_stats.vrxs_rescheduled++;
2250 VTNET_RXQ_UNLOCK(rxq);
2251 taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
2252 } else
2253 VTNET_RXQ_UNLOCK(rxq);
2254 }
2255
2256 static void
vtnet_rx_vq_intr(void * xrxq)2257 vtnet_rx_vq_intr(void *xrxq)
2258 {
2259 struct vtnet_rxq *rxq;
2260
2261 rxq = xrxq;
2262 vtnet_rx_vq_process(rxq, VTNET_INTR_DISABLE_RETRIES);
2263 }
2264
2265 static void
vtnet_rxq_tq_intr(void * xrxq,int pending __unused)2266 vtnet_rxq_tq_intr(void *xrxq, int pending __unused)
2267 {
2268 struct vtnet_rxq *rxq;
2269
2270 rxq = xrxq;
2271 vtnet_rx_vq_process(rxq, 0);
2272 }
2273
2274 static int
vtnet_txq_intr_threshold(struct vtnet_txq * txq)2275 vtnet_txq_intr_threshold(struct vtnet_txq *txq)
2276 {
2277 struct vtnet_softc *sc;
2278 int threshold;
2279
2280 sc = txq->vtntx_sc;
2281
2282 /*
2283 * The Tx interrupt is disabled until the queue free count falls
2284 * below our threshold. Completed frames are drained from the Tx
2285 * virtqueue before transmitting new frames and in the watchdog
2286 * callout, so the frequency of Tx interrupts is greatly reduced,
2287 * at the cost of not freeing mbufs as quickly as they otherwise
2288 * would be.
2289 */
2290 threshold = virtqueue_size(txq->vtntx_vq) / 4;
2291
2292 /*
2293 * Without indirect descriptors, leave enough room for the most
2294 * segments we handle.
2295 */
2296 if ((sc->vtnet_flags & VTNET_FLAG_INDIRECT) == 0 &&
2297 threshold < sc->vtnet_tx_nsegs)
2298 threshold = sc->vtnet_tx_nsegs;
2299
2300 return (threshold);
2301 }
2302
2303 static int
vtnet_txq_below_threshold(struct vtnet_txq * txq)2304 vtnet_txq_below_threshold(struct vtnet_txq *txq)
2305 {
2306 struct virtqueue *vq;
2307
2308 vq = txq->vtntx_vq;
2309
2310 return (virtqueue_nfree(vq) <= txq->vtntx_intr_threshold);
2311 }
2312
2313 static int
vtnet_txq_notify(struct vtnet_txq * txq)2314 vtnet_txq_notify(struct vtnet_txq *txq)
2315 {
2316 struct virtqueue *vq;
2317
2318 vq = txq->vtntx_vq;
2319
2320 txq->vtntx_watchdog = VTNET_TX_TIMEOUT;
2321 virtqueue_notify(vq);
2322
2323 if (vtnet_txq_enable_intr(txq) == 0)
2324 return (0);
2325
2326 /*
2327 * Drain frames that were completed since last checked. If this
2328 * causes the queue to go above the threshold, the caller should
2329 * continue transmitting.
2330 */
2331 if (vtnet_txq_eof(txq) != 0 && vtnet_txq_below_threshold(txq) == 0) {
2332 virtqueue_disable_intr(vq);
2333 return (1);
2334 }
2335
2336 return (0);
2337 }
2338
2339 static void
vtnet_txq_free_mbufs(struct vtnet_txq * txq)2340 vtnet_txq_free_mbufs(struct vtnet_txq *txq)
2341 {
2342 struct virtqueue *vq;
2343 struct vtnet_tx_header *txhdr;
2344 int last;
2345 #ifdef DEV_NETMAP
2346 struct netmap_kring *kring = netmap_kring_on(NA(txq->vtntx_sc->vtnet_ifp),
2347 txq->vtntx_id, NR_TX);
2348 #else /* !DEV_NETMAP */
2349 void *kring = NULL;
2350 #endif /* !DEV_NETMAP */
2351
2352 vq = txq->vtntx_vq;
2353 last = 0;
2354
2355 while ((txhdr = virtqueue_drain(vq, &last)) != NULL) {
2356 if (kring == NULL) {
2357 m_freem(txhdr->vth_mbuf);
2358 uma_zfree(vtnet_tx_header_zone, txhdr);
2359 }
2360 }
2361
2362 KASSERT(virtqueue_empty(vq),
2363 ("%s: mbufs remaining in tx queue %p", __func__, txq));
2364 }
2365
2366 /*
2367 * BMV: This can go away once we finally have offsets in the mbuf header.
2368 */
2369 static int
vtnet_txq_offload_ctx(struct vtnet_txq * txq,struct mbuf * m,int * etype,int * proto,int * start)2370 vtnet_txq_offload_ctx(struct vtnet_txq *txq, struct mbuf *m, int *etype,
2371 int *proto, int *start)
2372 {
2373 struct vtnet_softc *sc;
2374 struct ether_vlan_header *evh;
2375 #if defined(INET) || defined(INET6)
2376 int offset;
2377 #endif
2378
2379 sc = txq->vtntx_sc;
2380
2381 evh = mtod(m, struct ether_vlan_header *);
2382 if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
2383 /* BMV: We should handle nested VLAN tags too. */
2384 *etype = ntohs(evh->evl_proto);
2385 #if defined(INET) || defined(INET6)
2386 offset = sizeof(struct ether_vlan_header);
2387 #endif
2388 } else {
2389 *etype = ntohs(evh->evl_encap_proto);
2390 #if defined(INET) || defined(INET6)
2391 offset = sizeof(struct ether_header);
2392 #endif
2393 }
2394
2395 switch (*etype) {
2396 #if defined(INET)
2397 case ETHERTYPE_IP: {
2398 struct ip *ip, iphdr;
2399 if (__predict_false(m->m_len < offset + sizeof(struct ip))) {
2400 m_copydata(m, offset, sizeof(struct ip),
2401 (caddr_t) &iphdr);
2402 ip = &iphdr;
2403 } else
2404 ip = (struct ip *)(m->m_data + offset);
2405 *proto = ip->ip_p;
2406 *start = offset + (ip->ip_hl << 2);
2407 break;
2408 }
2409 #endif
2410 #if defined(INET6)
2411 case ETHERTYPE_IPV6:
2412 *proto = -1;
2413 *start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto);
2414 /* Assert the network stack sent us a valid packet. */
2415 KASSERT(*start > offset,
2416 ("%s: mbuf %p start %d offset %d proto %d", __func__, m,
2417 *start, offset, *proto));
2418 break;
2419 #endif
2420 default:
2421 sc->vtnet_stats.tx_csum_unknown_ethtype++;
2422 return (EINVAL);
2423 }
2424
2425 return (0);
2426 }
2427
2428 static int
vtnet_txq_offload_tso(struct vtnet_txq * txq,struct mbuf * m,int eth_type,int offset,struct virtio_net_hdr * hdr)2429 vtnet_txq_offload_tso(struct vtnet_txq *txq, struct mbuf *m, int eth_type,
2430 int offset, struct virtio_net_hdr *hdr)
2431 {
2432 static struct timeval lastecn;
2433 static int curecn;
2434 struct vtnet_softc *sc;
2435 struct tcphdr *tcp, tcphdr;
2436
2437 sc = txq->vtntx_sc;
2438
2439 if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) {
2440 m_copydata(m, offset, sizeof(struct tcphdr), (caddr_t) &tcphdr);
2441 tcp = &tcphdr;
2442 } else
2443 tcp = (struct tcphdr *)(m->m_data + offset);
2444
2445 hdr->hdr_len = vtnet_gtoh16(sc, offset + (tcp->th_off << 2));
2446 hdr->gso_size = vtnet_gtoh16(sc, m->m_pkthdr.tso_segsz);
2447 hdr->gso_type = eth_type == ETHERTYPE_IP ? VIRTIO_NET_HDR_GSO_TCPV4 :
2448 VIRTIO_NET_HDR_GSO_TCPV6;
2449
2450 if (__predict_false(tcp->th_flags & TH_CWR)) {
2451 /*
2452 * Drop if VIRTIO_NET_F_HOST_ECN was not negotiated. In
2453 * FreeBSD, ECN support is not on a per-interface basis,
2454 * but globally via the net.inet.tcp.ecn.enable sysctl
2455 * knob. The default is off.
2456 */
2457 if ((sc->vtnet_flags & VTNET_FLAG_TSO_ECN) == 0) {
2458 if (ppsratecheck(&lastecn, &curecn, 1))
2459 if_printf(sc->vtnet_ifp,
2460 "TSO with ECN not negotiated with host\n");
2461 return (ENOTSUP);
2462 }
2463 hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN;
2464 }
2465
2466 txq->vtntx_stats.vtxs_tso++;
2467
2468 return (0);
2469 }
2470
2471 static struct mbuf *
vtnet_txq_offload(struct vtnet_txq * txq,struct mbuf * m,struct virtio_net_hdr * hdr)2472 vtnet_txq_offload(struct vtnet_txq *txq, struct mbuf *m,
2473 struct virtio_net_hdr *hdr)
2474 {
2475 struct vtnet_softc *sc;
2476 int flags, etype, csum_start, proto, error;
2477
2478 sc = txq->vtntx_sc;
2479 flags = m->m_pkthdr.csum_flags;
2480
2481 error = vtnet_txq_offload_ctx(txq, m, &etype, &proto, &csum_start);
2482 if (error)
2483 goto drop;
2484
2485 if (flags & (VTNET_CSUM_OFFLOAD | VTNET_CSUM_OFFLOAD_IPV6)) {
2486 /* Sanity check the parsed mbuf matches the offload flags. */
2487 if (__predict_false((flags & VTNET_CSUM_OFFLOAD &&
2488 etype != ETHERTYPE_IP) || (flags & VTNET_CSUM_OFFLOAD_IPV6
2489 && etype != ETHERTYPE_IPV6))) {
2490 sc->vtnet_stats.tx_csum_proto_mismatch++;
2491 goto drop;
2492 }
2493
2494 hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM;
2495 hdr->csum_start = vtnet_gtoh16(sc, csum_start);
2496 hdr->csum_offset = vtnet_gtoh16(sc, m->m_pkthdr.csum_data);
2497 txq->vtntx_stats.vtxs_csum++;
2498 }
2499
2500 if (flags & (CSUM_IP_TSO | CSUM_IP6_TSO)) {
2501 /*
2502 * Sanity check the parsed mbuf IP protocol is TCP, and
2503 * VirtIO TSO reqires the checksum offloading above.
2504 */
2505 if (__predict_false(proto != IPPROTO_TCP)) {
2506 sc->vtnet_stats.tx_tso_not_tcp++;
2507 goto drop;
2508 } else if (__predict_false((hdr->flags &
2509 VIRTIO_NET_HDR_F_NEEDS_CSUM) == 0)) {
2510 sc->vtnet_stats.tx_tso_without_csum++;
2511 goto drop;
2512 }
2513
2514 error = vtnet_txq_offload_tso(txq, m, etype, csum_start, hdr);
2515 if (error)
2516 goto drop;
2517 }
2518
2519 return (m);
2520
2521 drop:
2522 m_freem(m);
2523 return (NULL);
2524 }
2525
2526 static int
vtnet_txq_enqueue_buf(struct vtnet_txq * txq,struct mbuf ** m_head,struct vtnet_tx_header * txhdr)2527 vtnet_txq_enqueue_buf(struct vtnet_txq *txq, struct mbuf **m_head,
2528 struct vtnet_tx_header *txhdr)
2529 {
2530 struct vtnet_softc *sc;
2531 struct virtqueue *vq;
2532 struct sglist *sg;
2533 struct mbuf *m;
2534 int error;
2535
2536 sc = txq->vtntx_sc;
2537 vq = txq->vtntx_vq;
2538 sg = txq->vtntx_sg;
2539 m = *m_head;
2540
2541 sglist_reset(sg);
2542 error = sglist_append(sg, &txhdr->vth_uhdr, sc->vtnet_hdr_size);
2543 if (error != 0 || sg->sg_nseg != 1) {
2544 KASSERT(0, ("%s: cannot add header to sglist error %d nseg %d",
2545 __func__, error, sg->sg_nseg));
2546 goto fail;
2547 }
2548
2549 error = sglist_append_mbuf(sg, m);
2550 if (error) {
2551 m = m_defrag(m, M_NOWAIT);
2552 if (m == NULL)
2553 goto fail;
2554
2555 *m_head = m;
2556 sc->vtnet_stats.tx_defragged++;
2557
2558 error = sglist_append_mbuf(sg, m);
2559 if (error)
2560 goto fail;
2561 }
2562
2563 txhdr->vth_mbuf = m;
2564 error = virtqueue_enqueue(vq, txhdr, sg, sg->sg_nseg, 0);
2565
2566 return (error);
2567
2568 fail:
2569 sc->vtnet_stats.tx_defrag_failed++;
2570 m_freem(*m_head);
2571 *m_head = NULL;
2572
2573 return (ENOBUFS);
2574 }
2575
2576 static int
vtnet_txq_encap(struct vtnet_txq * txq,struct mbuf ** m_head,int flags)2577 vtnet_txq_encap(struct vtnet_txq *txq, struct mbuf **m_head, int flags)
2578 {
2579 struct vtnet_tx_header *txhdr;
2580 struct virtio_net_hdr *hdr;
2581 struct mbuf *m;
2582 int error;
2583
2584 m = *m_head;
2585 M_ASSERTPKTHDR(m);
2586
2587 txhdr = uma_zalloc(vtnet_tx_header_zone, flags | M_ZERO);
2588 if (txhdr == NULL) {
2589 m_freem(m);
2590 *m_head = NULL;
2591 return (ENOMEM);
2592 }
2593
2594 /*
2595 * Always use the non-mergeable header, regardless if mergable headers
2596 * were negotiated, because for transmit num_buffers is always zero.
2597 * The vtnet_hdr_size is used to enqueue the right header size segment.
2598 */
2599 hdr = &txhdr->vth_uhdr.hdr;
2600
2601 if (m->m_flags & M_VLANTAG) {
2602 m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
2603 if ((*m_head = m) == NULL) {
2604 error = ENOBUFS;
2605 goto fail;
2606 }
2607 m->m_flags &= ~M_VLANTAG;
2608 }
2609
2610 if (m->m_pkthdr.csum_flags & VTNET_CSUM_ALL_OFFLOAD) {
2611 m = vtnet_txq_offload(txq, m, hdr);
2612 if ((*m_head = m) == NULL) {
2613 error = ENOBUFS;
2614 goto fail;
2615 }
2616 }
2617
2618 error = vtnet_txq_enqueue_buf(txq, m_head, txhdr);
2619 fail:
2620 if (error)
2621 uma_zfree(vtnet_tx_header_zone, txhdr);
2622
2623 return (error);
2624 }
2625
2626 #ifdef VTNET_LEGACY_TX
2627
2628 static void
vtnet_start_locked(struct vtnet_txq * txq,if_t ifp)2629 vtnet_start_locked(struct vtnet_txq *txq, if_t ifp)
2630 {
2631 struct vtnet_softc *sc;
2632 struct virtqueue *vq;
2633 struct mbuf *m0;
2634 int tries, enq;
2635
2636 sc = txq->vtntx_sc;
2637 vq = txq->vtntx_vq;
2638 tries = 0;
2639
2640 VTNET_TXQ_LOCK_ASSERT(txq);
2641
2642 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0 ||
2643 sc->vtnet_link_active == 0)
2644 return;
2645
2646 vtnet_txq_eof(txq);
2647
2648 again:
2649 enq = 0;
2650
2651 while (!if_sendq_empty(ifp)) {
2652 if (virtqueue_full(vq))
2653 break;
2654
2655 m0 = if_dequeue(ifp);
2656 if (m0 == NULL)
2657 break;
2658
2659 if (vtnet_txq_encap(txq, &m0, M_NOWAIT) != 0) {
2660 if (m0 != NULL)
2661 if_sendq_prepend(ifp, m0);
2662 break;
2663 }
2664
2665 enq++;
2666 ETHER_BPF_MTAP(ifp, m0);
2667 }
2668
2669 if (enq > 0 && vtnet_txq_notify(txq) != 0) {
2670 if (tries++ < VTNET_NOTIFY_RETRIES)
2671 goto again;
2672
2673 txq->vtntx_stats.vtxs_rescheduled++;
2674 taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
2675 }
2676 }
2677
2678 static void
vtnet_start(if_t ifp)2679 vtnet_start(if_t ifp)
2680 {
2681 struct vtnet_softc *sc;
2682 struct vtnet_txq *txq;
2683
2684 sc = if_getsoftc(ifp);
2685 txq = &sc->vtnet_txqs[0];
2686
2687 VTNET_TXQ_LOCK(txq);
2688 vtnet_start_locked(txq, ifp);
2689 VTNET_TXQ_UNLOCK(txq);
2690 }
2691
2692 #else /* !VTNET_LEGACY_TX */
2693
2694 static int
vtnet_txq_mq_start_locked(struct vtnet_txq * txq,struct mbuf * m)2695 vtnet_txq_mq_start_locked(struct vtnet_txq *txq, struct mbuf *m)
2696 {
2697 struct vtnet_softc *sc;
2698 struct virtqueue *vq;
2699 struct buf_ring *br;
2700 if_t ifp;
2701 int enq, tries, error;
2702
2703 sc = txq->vtntx_sc;
2704 vq = txq->vtntx_vq;
2705 br = txq->vtntx_br;
2706 ifp = sc->vtnet_ifp;
2707 tries = 0;
2708 error = 0;
2709
2710 VTNET_TXQ_LOCK_ASSERT(txq);
2711
2712 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0 ||
2713 sc->vtnet_link_active == 0) {
2714 if (m != NULL)
2715 error = drbr_enqueue(ifp, br, m);
2716 return (error);
2717 }
2718
2719 if (m != NULL) {
2720 error = drbr_enqueue(ifp, br, m);
2721 if (error)
2722 return (error);
2723 }
2724
2725 vtnet_txq_eof(txq);
2726
2727 again:
2728 enq = 0;
2729
2730 while ((m = drbr_peek(ifp, br)) != NULL) {
2731 if (virtqueue_full(vq)) {
2732 drbr_putback(ifp, br, m);
2733 break;
2734 }
2735
2736 if (vtnet_txq_encap(txq, &m, M_NOWAIT) != 0) {
2737 if (m != NULL)
2738 drbr_putback(ifp, br, m);
2739 else
2740 drbr_advance(ifp, br);
2741 break;
2742 }
2743 drbr_advance(ifp, br);
2744
2745 enq++;
2746 ETHER_BPF_MTAP(ifp, m);
2747 }
2748
2749 if (enq > 0 && vtnet_txq_notify(txq) != 0) {
2750 if (tries++ < VTNET_NOTIFY_RETRIES)
2751 goto again;
2752
2753 txq->vtntx_stats.vtxs_rescheduled++;
2754 taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
2755 }
2756
2757 return (0);
2758 }
2759
2760 static int
vtnet_txq_mq_start(if_t ifp,struct mbuf * m)2761 vtnet_txq_mq_start(if_t ifp, struct mbuf *m)
2762 {
2763 struct vtnet_softc *sc;
2764 struct vtnet_txq *txq;
2765 int i, npairs, error;
2766
2767 sc = if_getsoftc(ifp);
2768 npairs = sc->vtnet_act_vq_pairs;
2769
2770 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
2771 i = m->m_pkthdr.flowid % npairs;
2772 else
2773 i = curcpu % npairs;
2774
2775 txq = &sc->vtnet_txqs[i];
2776
2777 if (VTNET_TXQ_TRYLOCK(txq) != 0) {
2778 error = vtnet_txq_mq_start_locked(txq, m);
2779 VTNET_TXQ_UNLOCK(txq);
2780 } else {
2781 error = drbr_enqueue(ifp, txq->vtntx_br, m);
2782 taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_defrtask);
2783 }
2784
2785 return (error);
2786 }
2787
2788 static void
vtnet_txq_tq_deferred(void * xtxq,int pending __unused)2789 vtnet_txq_tq_deferred(void *xtxq, int pending __unused)
2790 {
2791 struct vtnet_softc *sc;
2792 struct vtnet_txq *txq;
2793
2794 txq = xtxq;
2795 sc = txq->vtntx_sc;
2796
2797 VTNET_TXQ_LOCK(txq);
2798 if (!drbr_empty(sc->vtnet_ifp, txq->vtntx_br))
2799 vtnet_txq_mq_start_locked(txq, NULL);
2800 VTNET_TXQ_UNLOCK(txq);
2801 }
2802
2803 #endif /* VTNET_LEGACY_TX */
2804
2805 static void
vtnet_txq_start(struct vtnet_txq * txq)2806 vtnet_txq_start(struct vtnet_txq *txq)
2807 {
2808 struct vtnet_softc *sc;
2809 if_t ifp;
2810
2811 sc = txq->vtntx_sc;
2812 ifp = sc->vtnet_ifp;
2813
2814 #ifdef VTNET_LEGACY_TX
2815 if (!if_sendq_empty(ifp))
2816 vtnet_start_locked(txq, ifp);
2817 #else
2818 if (!drbr_empty(ifp, txq->vtntx_br))
2819 vtnet_txq_mq_start_locked(txq, NULL);
2820 #endif
2821 }
2822
2823 static void
vtnet_txq_tq_intr(void * xtxq,int pending __unused)2824 vtnet_txq_tq_intr(void *xtxq, int pending __unused)
2825 {
2826 struct vtnet_softc *sc;
2827 struct vtnet_txq *txq;
2828 if_t ifp;
2829
2830 txq = xtxq;
2831 sc = txq->vtntx_sc;
2832 ifp = sc->vtnet_ifp;
2833
2834 VTNET_TXQ_LOCK(txq);
2835
2836 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
2837 VTNET_TXQ_UNLOCK(txq);
2838 return;
2839 }
2840
2841 vtnet_txq_eof(txq);
2842 vtnet_txq_start(txq);
2843
2844 VTNET_TXQ_UNLOCK(txq);
2845 }
2846
2847 static int
vtnet_txq_eof(struct vtnet_txq * txq)2848 vtnet_txq_eof(struct vtnet_txq *txq)
2849 {
2850 struct virtqueue *vq;
2851 struct vtnet_tx_header *txhdr;
2852 struct mbuf *m;
2853 int deq;
2854
2855 vq = txq->vtntx_vq;
2856 deq = 0;
2857 VTNET_TXQ_LOCK_ASSERT(txq);
2858
2859 while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) {
2860 m = txhdr->vth_mbuf;
2861 deq++;
2862
2863 txq->vtntx_stats.vtxs_opackets++;
2864 txq->vtntx_stats.vtxs_obytes += m->m_pkthdr.len;
2865 if (m->m_flags & M_MCAST)
2866 txq->vtntx_stats.vtxs_omcasts++;
2867
2868 m_freem(m);
2869 uma_zfree(vtnet_tx_header_zone, txhdr);
2870 }
2871
2872 if (virtqueue_empty(vq))
2873 txq->vtntx_watchdog = 0;
2874
2875 return (deq);
2876 }
2877
2878 static void
vtnet_tx_vq_intr(void * xtxq)2879 vtnet_tx_vq_intr(void *xtxq)
2880 {
2881 struct vtnet_softc *sc;
2882 struct vtnet_txq *txq;
2883 if_t ifp;
2884
2885 txq = xtxq;
2886 sc = txq->vtntx_sc;
2887 ifp = sc->vtnet_ifp;
2888
2889 if (__predict_false(txq->vtntx_id >= sc->vtnet_act_vq_pairs)) {
2890 /*
2891 * Ignore this interrupt. Either this is a spurious interrupt
2892 * or multiqueue without per-VQ MSIX so every queue needs to
2893 * be polled (a brain dead configuration we could try harder
2894 * to avoid).
2895 */
2896 vtnet_txq_disable_intr(txq);
2897 return;
2898 }
2899
2900 #ifdef DEV_NETMAP
2901 if (netmap_tx_irq(ifp, txq->vtntx_id) != NM_IRQ_PASS)
2902 return;
2903 #endif /* DEV_NETMAP */
2904
2905 VTNET_TXQ_LOCK(txq);
2906
2907 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
2908 VTNET_TXQ_UNLOCK(txq);
2909 return;
2910 }
2911
2912 vtnet_txq_eof(txq);
2913 vtnet_txq_start(txq);
2914
2915 VTNET_TXQ_UNLOCK(txq);
2916 }
2917
2918 static void
vtnet_tx_start_all(struct vtnet_softc * sc)2919 vtnet_tx_start_all(struct vtnet_softc *sc)
2920 {
2921 struct vtnet_txq *txq;
2922 int i;
2923
2924 VTNET_CORE_LOCK_ASSERT(sc);
2925
2926 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
2927 txq = &sc->vtnet_txqs[i];
2928
2929 VTNET_TXQ_LOCK(txq);
2930 vtnet_txq_start(txq);
2931 VTNET_TXQ_UNLOCK(txq);
2932 }
2933 }
2934
2935 #ifndef VTNET_LEGACY_TX
2936 static void
vtnet_qflush(if_t ifp)2937 vtnet_qflush(if_t ifp)
2938 {
2939 struct vtnet_softc *sc;
2940 struct vtnet_txq *txq;
2941 struct mbuf *m;
2942 int i;
2943
2944 sc = if_getsoftc(ifp);
2945
2946 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
2947 txq = &sc->vtnet_txqs[i];
2948
2949 VTNET_TXQ_LOCK(txq);
2950 while ((m = buf_ring_dequeue_sc(txq->vtntx_br)) != NULL)
2951 m_freem(m);
2952 VTNET_TXQ_UNLOCK(txq);
2953 }
2954
2955 if_qflush(ifp);
2956 }
2957 #endif
2958
2959 static int
vtnet_watchdog(struct vtnet_txq * txq)2960 vtnet_watchdog(struct vtnet_txq *txq)
2961 {
2962 if_t ifp;
2963
2964 ifp = txq->vtntx_sc->vtnet_ifp;
2965
2966 VTNET_TXQ_LOCK(txq);
2967 if (txq->vtntx_watchdog == 1) {
2968 /*
2969 * Only drain completed frames if the watchdog is about to
2970 * expire. If any frames were drained, there may be enough
2971 * free descriptors now available to transmit queued frames.
2972 * In that case, the timer will immediately be decremented
2973 * below, but the timeout is generous enough that should not
2974 * be a problem.
2975 */
2976 if (vtnet_txq_eof(txq) != 0)
2977 vtnet_txq_start(txq);
2978 }
2979
2980 if (txq->vtntx_watchdog == 0 || --txq->vtntx_watchdog) {
2981 VTNET_TXQ_UNLOCK(txq);
2982 return (0);
2983 }
2984 VTNET_TXQ_UNLOCK(txq);
2985
2986 if_printf(ifp, "watchdog timeout on queue %d\n", txq->vtntx_id);
2987 return (1);
2988 }
2989
2990 static void
vtnet_accum_stats(struct vtnet_softc * sc,struct vtnet_rxq_stats * rxacc,struct vtnet_txq_stats * txacc)2991 vtnet_accum_stats(struct vtnet_softc *sc, struct vtnet_rxq_stats *rxacc,
2992 struct vtnet_txq_stats *txacc)
2993 {
2994
2995 bzero(rxacc, sizeof(struct vtnet_rxq_stats));
2996 bzero(txacc, sizeof(struct vtnet_txq_stats));
2997
2998 for (int i = 0; i < sc->vtnet_max_vq_pairs; i++) {
2999 struct vtnet_rxq_stats *rxst;
3000 struct vtnet_txq_stats *txst;
3001
3002 rxst = &sc->vtnet_rxqs[i].vtnrx_stats;
3003 rxacc->vrxs_ipackets += rxst->vrxs_ipackets;
3004 rxacc->vrxs_ibytes += rxst->vrxs_ibytes;
3005 rxacc->vrxs_iqdrops += rxst->vrxs_iqdrops;
3006 rxacc->vrxs_csum += rxst->vrxs_csum;
3007 rxacc->vrxs_csum_failed += rxst->vrxs_csum_failed;
3008 rxacc->vrxs_rescheduled += rxst->vrxs_rescheduled;
3009
3010 txst = &sc->vtnet_txqs[i].vtntx_stats;
3011 txacc->vtxs_opackets += txst->vtxs_opackets;
3012 txacc->vtxs_obytes += txst->vtxs_obytes;
3013 txacc->vtxs_csum += txst->vtxs_csum;
3014 txacc->vtxs_tso += txst->vtxs_tso;
3015 txacc->vtxs_rescheduled += txst->vtxs_rescheduled;
3016 }
3017 }
3018
3019 static uint64_t
vtnet_get_counter(if_t ifp,ift_counter cnt)3020 vtnet_get_counter(if_t ifp, ift_counter cnt)
3021 {
3022 struct vtnet_softc *sc;
3023 struct vtnet_rxq_stats rxaccum;
3024 struct vtnet_txq_stats txaccum;
3025
3026 sc = if_getsoftc(ifp);
3027 vtnet_accum_stats(sc, &rxaccum, &txaccum);
3028
3029 switch (cnt) {
3030 case IFCOUNTER_IPACKETS:
3031 return (rxaccum.vrxs_ipackets);
3032 case IFCOUNTER_IQDROPS:
3033 return (rxaccum.vrxs_iqdrops);
3034 case IFCOUNTER_IERRORS:
3035 return (rxaccum.vrxs_ierrors);
3036 case IFCOUNTER_OPACKETS:
3037 return (txaccum.vtxs_opackets);
3038 #ifndef VTNET_LEGACY_TX
3039 case IFCOUNTER_OBYTES:
3040 return (txaccum.vtxs_obytes);
3041 case IFCOUNTER_OMCASTS:
3042 return (txaccum.vtxs_omcasts);
3043 #endif
3044 default:
3045 return (if_get_counter_default(ifp, cnt));
3046 }
3047 }
3048
3049 static void
vtnet_tick(void * xsc)3050 vtnet_tick(void *xsc)
3051 {
3052 struct vtnet_softc *sc;
3053 if_t ifp;
3054 int i, timedout;
3055
3056 sc = xsc;
3057 ifp = sc->vtnet_ifp;
3058 timedout = 0;
3059
3060 VTNET_CORE_LOCK_ASSERT(sc);
3061
3062 for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
3063 timedout |= vtnet_watchdog(&sc->vtnet_txqs[i]);
3064
3065 if (timedout != 0) {
3066 if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
3067 vtnet_init_locked(sc, 0);
3068 } else
3069 callout_schedule(&sc->vtnet_tick_ch, hz);
3070 }
3071
3072 static void
vtnet_start_taskqueues(struct vtnet_softc * sc)3073 vtnet_start_taskqueues(struct vtnet_softc *sc)
3074 {
3075 device_t dev;
3076 struct vtnet_rxq *rxq;
3077 struct vtnet_txq *txq;
3078 int i, error;
3079
3080 dev = sc->vtnet_dev;
3081
3082 /*
3083 * Errors here are very difficult to recover from - we cannot
3084 * easily fail because, if this is during boot, we will hang
3085 * when freeing any successfully started taskqueues because
3086 * the scheduler isn't up yet.
3087 *
3088 * Most drivers just ignore the return value - it only fails
3089 * with ENOMEM so an error is not likely.
3090 */
3091 for (i = 0; i < sc->vtnet_req_vq_pairs; i++) {
3092 rxq = &sc->vtnet_rxqs[i];
3093 error = taskqueue_start_threads(&rxq->vtnrx_tq, 1, PI_NET,
3094 "%s rxq %d", device_get_nameunit(dev), rxq->vtnrx_id);
3095 if (error) {
3096 device_printf(dev, "failed to start rx taskq %d\n",
3097 rxq->vtnrx_id);
3098 }
3099
3100 txq = &sc->vtnet_txqs[i];
3101 error = taskqueue_start_threads(&txq->vtntx_tq, 1, PI_NET,
3102 "%s txq %d", device_get_nameunit(dev), txq->vtntx_id);
3103 if (error) {
3104 device_printf(dev, "failed to start tx taskq %d\n",
3105 txq->vtntx_id);
3106 }
3107 }
3108 }
3109
3110 static void
vtnet_free_taskqueues(struct vtnet_softc * sc)3111 vtnet_free_taskqueues(struct vtnet_softc *sc)
3112 {
3113 struct vtnet_rxq *rxq;
3114 struct vtnet_txq *txq;
3115 int i;
3116
3117 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
3118 rxq = &sc->vtnet_rxqs[i];
3119 if (rxq->vtnrx_tq != NULL) {
3120 taskqueue_free(rxq->vtnrx_tq);
3121 rxq->vtnrx_tq = NULL;
3122 }
3123
3124 txq = &sc->vtnet_txqs[i];
3125 if (txq->vtntx_tq != NULL) {
3126 taskqueue_free(txq->vtntx_tq);
3127 txq->vtntx_tq = NULL;
3128 }
3129 }
3130 }
3131
3132 static void
vtnet_drain_taskqueues(struct vtnet_softc * sc)3133 vtnet_drain_taskqueues(struct vtnet_softc *sc)
3134 {
3135 struct vtnet_rxq *rxq;
3136 struct vtnet_txq *txq;
3137 int i;
3138
3139 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
3140 rxq = &sc->vtnet_rxqs[i];
3141 if (rxq->vtnrx_tq != NULL)
3142 taskqueue_drain(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
3143
3144 txq = &sc->vtnet_txqs[i];
3145 if (txq->vtntx_tq != NULL) {
3146 taskqueue_drain(txq->vtntx_tq, &txq->vtntx_intrtask);
3147 #ifndef VTNET_LEGACY_TX
3148 taskqueue_drain(txq->vtntx_tq, &txq->vtntx_defrtask);
3149 #endif
3150 }
3151 }
3152 }
3153
3154 static void
vtnet_drain_rxtx_queues(struct vtnet_softc * sc)3155 vtnet_drain_rxtx_queues(struct vtnet_softc *sc)
3156 {
3157 struct vtnet_rxq *rxq;
3158 struct vtnet_txq *txq;
3159 int i;
3160
3161 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
3162 rxq = &sc->vtnet_rxqs[i];
3163 vtnet_rxq_free_mbufs(rxq);
3164
3165 txq = &sc->vtnet_txqs[i];
3166 vtnet_txq_free_mbufs(txq);
3167 }
3168 }
3169
3170 static void
vtnet_stop_rendezvous(struct vtnet_softc * sc)3171 vtnet_stop_rendezvous(struct vtnet_softc *sc)
3172 {
3173 struct vtnet_rxq *rxq;
3174 struct vtnet_txq *txq;
3175 int i;
3176
3177 VTNET_CORE_LOCK_ASSERT(sc);
3178
3179 /*
3180 * Lock and unlock the per-queue mutex so we known the stop
3181 * state is visible. Doing only the active queues should be
3182 * sufficient, but it does not cost much extra to do all the
3183 * queues.
3184 */
3185 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
3186 rxq = &sc->vtnet_rxqs[i];
3187 VTNET_RXQ_LOCK(rxq);
3188 VTNET_RXQ_UNLOCK(rxq);
3189
3190 txq = &sc->vtnet_txqs[i];
3191 VTNET_TXQ_LOCK(txq);
3192 VTNET_TXQ_UNLOCK(txq);
3193 }
3194 }
3195
3196 static void
vtnet_stop(struct vtnet_softc * sc)3197 vtnet_stop(struct vtnet_softc *sc)
3198 {
3199 device_t dev;
3200 if_t ifp;
3201
3202 dev = sc->vtnet_dev;
3203 ifp = sc->vtnet_ifp;
3204
3205 VTNET_CORE_LOCK_ASSERT(sc);
3206
3207 if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
3208 sc->vtnet_link_active = 0;
3209 callout_stop(&sc->vtnet_tick_ch);
3210
3211 /* Only advisory. */
3212 vtnet_disable_interrupts(sc);
3213
3214 #ifdef DEV_NETMAP
3215 /* Stop any pending txsync/rxsync and disable them. */
3216 netmap_disable_all_rings(ifp);
3217 #endif /* DEV_NETMAP */
3218
3219 /*
3220 * Stop the host adapter. This resets it to the pre-initialized
3221 * state. It will not generate any interrupts until after it is
3222 * reinitialized.
3223 */
3224 virtio_stop(dev);
3225 vtnet_stop_rendezvous(sc);
3226
3227 vtnet_drain_rxtx_queues(sc);
3228 sc->vtnet_act_vq_pairs = 1;
3229 }
3230
3231 static int
vtnet_virtio_reinit(struct vtnet_softc * sc)3232 vtnet_virtio_reinit(struct vtnet_softc *sc)
3233 {
3234 device_t dev;
3235 if_t ifp;
3236 uint64_t features;
3237 int error;
3238
3239 dev = sc->vtnet_dev;
3240 ifp = sc->vtnet_ifp;
3241 features = sc->vtnet_negotiated_features;
3242
3243 /*
3244 * Re-negotiate with the host, removing any disabled receive
3245 * features. Transmit features are disabled only on our side
3246 * via if_capenable and if_hwassist.
3247 */
3248
3249 if ((if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) == 0)
3250 features &= ~(VIRTIO_NET_F_GUEST_CSUM | VTNET_LRO_FEATURES);
3251
3252 if ((if_getcapenable(ifp) & IFCAP_LRO) == 0)
3253 features &= ~VTNET_LRO_FEATURES;
3254
3255 if ((if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) == 0)
3256 features &= ~VIRTIO_NET_F_CTRL_VLAN;
3257
3258 error = virtio_reinit(dev, features);
3259 if (error) {
3260 device_printf(dev, "virtio reinit error %d\n", error);
3261 return (error);
3262 }
3263
3264 sc->vtnet_features = features;
3265 virtio_reinit_complete(dev);
3266
3267 return (0);
3268 }
3269
3270 static void
vtnet_init_rx_filters(struct vtnet_softc * sc)3271 vtnet_init_rx_filters(struct vtnet_softc *sc)
3272 {
3273 if_t ifp;
3274
3275 ifp = sc->vtnet_ifp;
3276
3277 if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
3278 vtnet_rx_filter(sc);
3279 vtnet_rx_filter_mac(sc);
3280 }
3281
3282 if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
3283 vtnet_rx_filter_vlan(sc);
3284 }
3285
3286 static int
vtnet_init_rx_queues(struct vtnet_softc * sc)3287 vtnet_init_rx_queues(struct vtnet_softc *sc)
3288 {
3289 device_t dev;
3290 if_t ifp;
3291 struct vtnet_rxq *rxq;
3292 int i, clustersz, error;
3293
3294 dev = sc->vtnet_dev;
3295 ifp = sc->vtnet_ifp;
3296
3297 clustersz = vtnet_rx_cluster_size(sc, if_getmtu(ifp));
3298 sc->vtnet_rx_clustersz = clustersz;
3299
3300 if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG) {
3301 sc->vtnet_rx_nmbufs = howmany(sizeof(struct vtnet_rx_header) +
3302 VTNET_MAX_RX_SIZE, clustersz);
3303 KASSERT(sc->vtnet_rx_nmbufs < sc->vtnet_rx_nsegs,
3304 ("%s: too many rx mbufs %d for %d segments", __func__,
3305 sc->vtnet_rx_nmbufs, sc->vtnet_rx_nsegs));
3306 } else
3307 sc->vtnet_rx_nmbufs = 1;
3308
3309 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
3310 rxq = &sc->vtnet_rxqs[i];
3311
3312 /* Hold the lock to satisfy asserts. */
3313 VTNET_RXQ_LOCK(rxq);
3314 error = vtnet_rxq_populate(rxq);
3315 VTNET_RXQ_UNLOCK(rxq);
3316
3317 if (error) {
3318 device_printf(dev, "cannot populate Rx queue %d\n", i);
3319 return (error);
3320 }
3321 }
3322
3323 return (0);
3324 }
3325
3326 static int
vtnet_init_tx_queues(struct vtnet_softc * sc)3327 vtnet_init_tx_queues(struct vtnet_softc *sc)
3328 {
3329 struct vtnet_txq *txq;
3330 int i;
3331
3332 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
3333 txq = &sc->vtnet_txqs[i];
3334 txq->vtntx_watchdog = 0;
3335 txq->vtntx_intr_threshold = vtnet_txq_intr_threshold(txq);
3336 #ifdef DEV_NETMAP
3337 netmap_reset(NA(sc->vtnet_ifp), NR_TX, i, 0);
3338 #endif /* DEV_NETMAP */
3339 }
3340
3341 return (0);
3342 }
3343
3344 static int
vtnet_init_rxtx_queues(struct vtnet_softc * sc)3345 vtnet_init_rxtx_queues(struct vtnet_softc *sc)
3346 {
3347 int error;
3348
3349 error = vtnet_init_rx_queues(sc);
3350 if (error)
3351 return (error);
3352
3353 error = vtnet_init_tx_queues(sc);
3354 if (error)
3355 return (error);
3356
3357 return (0);
3358 }
3359
3360 static void
vtnet_set_active_vq_pairs(struct vtnet_softc * sc)3361 vtnet_set_active_vq_pairs(struct vtnet_softc *sc)
3362 {
3363 device_t dev;
3364 int npairs;
3365
3366 dev = sc->vtnet_dev;
3367
3368 if ((sc->vtnet_flags & VTNET_FLAG_MQ) == 0) {
3369 sc->vtnet_act_vq_pairs = 1;
3370 return;
3371 }
3372
3373 npairs = sc->vtnet_req_vq_pairs;
3374
3375 if (vtnet_ctrl_mq_cmd(sc, npairs) != 0) {
3376 device_printf(dev, "cannot set active queue pairs to %d, "
3377 "falling back to 1 queue pair\n", npairs);
3378 npairs = 1;
3379 }
3380
3381 sc->vtnet_act_vq_pairs = npairs;
3382 }
3383
3384 static void
vtnet_update_rx_offloads(struct vtnet_softc * sc)3385 vtnet_update_rx_offloads(struct vtnet_softc *sc)
3386 {
3387 if_t ifp;
3388 uint64_t features;
3389 int error;
3390
3391 ifp = sc->vtnet_ifp;
3392 features = sc->vtnet_features;
3393
3394 VTNET_CORE_LOCK_ASSERT(sc);
3395
3396 if (if_getcapabilities(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) {
3397 if (if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6))
3398 features |= VIRTIO_NET_F_GUEST_CSUM;
3399 else
3400 features &= ~VIRTIO_NET_F_GUEST_CSUM;
3401 }
3402
3403 if (if_getcapabilities(ifp) & IFCAP_LRO && !vtnet_software_lro(sc)) {
3404 if (if_getcapenable(ifp) & IFCAP_LRO)
3405 features |= VTNET_LRO_FEATURES;
3406 else
3407 features &= ~VTNET_LRO_FEATURES;
3408 }
3409
3410 error = vtnet_ctrl_guest_offloads(sc,
3411 features & (VIRTIO_NET_F_GUEST_CSUM | VIRTIO_NET_F_GUEST_TSO4 |
3412 VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_ECN |
3413 VIRTIO_NET_F_GUEST_UFO));
3414 if (error) {
3415 device_printf(sc->vtnet_dev,
3416 "%s: cannot update Rx features\n", __func__);
3417 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
3418 if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
3419 vtnet_init_locked(sc, 0);
3420 }
3421 } else
3422 sc->vtnet_features = features;
3423 }
3424
3425 static int
vtnet_reinit(struct vtnet_softc * sc)3426 vtnet_reinit(struct vtnet_softc *sc)
3427 {
3428 if_t ifp;
3429 int error;
3430
3431 ifp = sc->vtnet_ifp;
3432
3433 bcopy(if_getlladdr(ifp), sc->vtnet_hwaddr, ETHER_ADDR_LEN);
3434
3435 error = vtnet_virtio_reinit(sc);
3436 if (error)
3437 return (error);
3438
3439 vtnet_set_macaddr(sc);
3440 vtnet_set_active_vq_pairs(sc);
3441
3442 if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ)
3443 vtnet_init_rx_filters(sc);
3444
3445 if_sethwassist(ifp, 0);
3446 if (if_getcapenable(ifp) & IFCAP_TXCSUM)
3447 if_sethwassistbits(ifp, VTNET_CSUM_OFFLOAD, 0);
3448 if (if_getcapenable(ifp) & IFCAP_TXCSUM_IPV6)
3449 if_sethwassistbits(ifp, VTNET_CSUM_OFFLOAD_IPV6, 0);
3450 if (if_getcapenable(ifp) & IFCAP_TSO4)
3451 if_sethwassistbits(ifp, CSUM_IP_TSO, 0);
3452 if (if_getcapenable(ifp) & IFCAP_TSO6)
3453 if_sethwassistbits(ifp, CSUM_IP6_TSO, 0);
3454
3455 error = vtnet_init_rxtx_queues(sc);
3456 if (error)
3457 return (error);
3458
3459 return (0);
3460 }
3461
3462 static void
vtnet_init_locked(struct vtnet_softc * sc,int init_mode)3463 vtnet_init_locked(struct vtnet_softc *sc, int init_mode)
3464 {
3465 if_t ifp;
3466
3467 ifp = sc->vtnet_ifp;
3468
3469 VTNET_CORE_LOCK_ASSERT(sc);
3470
3471 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
3472 return;
3473
3474 vtnet_stop(sc);
3475
3476 #ifdef DEV_NETMAP
3477 /* Once stopped we can update the netmap flags, if necessary. */
3478 switch (init_mode) {
3479 case VTNET_INIT_NETMAP_ENTER:
3480 nm_set_native_flags(NA(ifp));
3481 break;
3482 case VTNET_INIT_NETMAP_EXIT:
3483 nm_clear_native_flags(NA(ifp));
3484 break;
3485 }
3486 #endif /* DEV_NETMAP */
3487
3488 if (vtnet_reinit(sc) != 0) {
3489 vtnet_stop(sc);
3490 return;
3491 }
3492
3493 if_setdrvflagbits(ifp, IFF_DRV_RUNNING, 0);
3494 vtnet_update_link_status(sc);
3495 vtnet_enable_interrupts(sc);
3496 callout_reset(&sc->vtnet_tick_ch, hz, vtnet_tick, sc);
3497
3498 #ifdef DEV_NETMAP
3499 /* Re-enable txsync/rxsync. */
3500 netmap_enable_all_rings(ifp);
3501 #endif /* DEV_NETMAP */
3502 }
3503
3504 static void
vtnet_init(void * xsc)3505 vtnet_init(void *xsc)
3506 {
3507 struct vtnet_softc *sc;
3508
3509 sc = xsc;
3510
3511 VTNET_CORE_LOCK(sc);
3512 vtnet_init_locked(sc, 0);
3513 VTNET_CORE_UNLOCK(sc);
3514 }
3515
3516 static void
vtnet_free_ctrl_vq(struct vtnet_softc * sc)3517 vtnet_free_ctrl_vq(struct vtnet_softc *sc)
3518 {
3519
3520 /*
3521 * The control virtqueue is only polled and therefore it should
3522 * already be empty.
3523 */
3524 KASSERT(virtqueue_empty(sc->vtnet_ctrl_vq),
3525 ("%s: ctrl vq %p not empty", __func__, sc->vtnet_ctrl_vq));
3526 }
3527
3528 static void
vtnet_exec_ctrl_cmd(struct vtnet_softc * sc,void * cookie,struct sglist * sg,int readable,int writable)3529 vtnet_exec_ctrl_cmd(struct vtnet_softc *sc, void *cookie,
3530 struct sglist *sg, int readable, int writable)
3531 {
3532 struct virtqueue *vq;
3533
3534 vq = sc->vtnet_ctrl_vq;
3535
3536 MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_VQ);
3537 VTNET_CORE_LOCK_ASSERT(sc);
3538
3539 if (!virtqueue_empty(vq))
3540 return;
3541
3542 /*
3543 * Poll for the response, but the command is likely completed before
3544 * returning from the notify.
3545 */
3546 if (virtqueue_enqueue(vq, cookie, sg, readable, writable) == 0) {
3547 virtqueue_notify(vq);
3548 virtqueue_poll(vq, NULL);
3549 }
3550 }
3551
3552 static int
vtnet_ctrl_mac_cmd(struct vtnet_softc * sc,uint8_t * hwaddr)3553 vtnet_ctrl_mac_cmd(struct vtnet_softc *sc, uint8_t *hwaddr)
3554 {
3555 struct sglist_seg segs[3];
3556 struct sglist sg;
3557 struct {
3558 struct virtio_net_ctrl_hdr hdr __aligned(2);
3559 uint8_t pad1;
3560 uint8_t addr[ETHER_ADDR_LEN] __aligned(8);
3561 uint8_t pad2;
3562 uint8_t ack;
3563 } s;
3564 int error;
3565
3566 error = 0;
3567 MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_MAC);
3568
3569 s.hdr.class = VIRTIO_NET_CTRL_MAC;
3570 s.hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
3571 bcopy(hwaddr, &s.addr[0], ETHER_ADDR_LEN);
3572 s.ack = VIRTIO_NET_ERR;
3573
3574 sglist_init(&sg, nitems(segs), segs);
3575 error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3576 error |= sglist_append(&sg, &s.addr[0], ETHER_ADDR_LEN);
3577 error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3578 MPASS(error == 0 && sg.sg_nseg == nitems(segs));
3579
3580 if (error == 0)
3581 vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3582
3583 return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3584 }
3585
3586 static int
vtnet_ctrl_guest_offloads(struct vtnet_softc * sc,uint64_t offloads)3587 vtnet_ctrl_guest_offloads(struct vtnet_softc *sc, uint64_t offloads)
3588 {
3589 struct sglist_seg segs[3];
3590 struct sglist sg;
3591 struct {
3592 struct virtio_net_ctrl_hdr hdr __aligned(2);
3593 uint8_t pad1;
3594 uint64_t offloads __aligned(8);
3595 uint8_t pad2;
3596 uint8_t ack;
3597 } s;
3598 int error;
3599
3600 error = 0;
3601 MPASS(sc->vtnet_features & VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
3602
3603 s.hdr.class = VIRTIO_NET_CTRL_GUEST_OFFLOADS;
3604 s.hdr.cmd = VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET;
3605 s.offloads = vtnet_gtoh64(sc, offloads);
3606 s.ack = VIRTIO_NET_ERR;
3607
3608 sglist_init(&sg, nitems(segs), segs);
3609 error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3610 error |= sglist_append(&sg, &s.offloads, sizeof(uint64_t));
3611 error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3612 MPASS(error == 0 && sg.sg_nseg == nitems(segs));
3613
3614 if (error == 0)
3615 vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3616
3617 return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3618 }
3619
3620 static int
vtnet_ctrl_mq_cmd(struct vtnet_softc * sc,uint16_t npairs)3621 vtnet_ctrl_mq_cmd(struct vtnet_softc *sc, uint16_t npairs)
3622 {
3623 struct sglist_seg segs[3];
3624 struct sglist sg;
3625 struct {
3626 struct virtio_net_ctrl_hdr hdr __aligned(2);
3627 uint8_t pad1;
3628 struct virtio_net_ctrl_mq mq __aligned(2);
3629 uint8_t pad2;
3630 uint8_t ack;
3631 } s;
3632 int error;
3633
3634 error = 0;
3635 MPASS(sc->vtnet_flags & VTNET_FLAG_MQ);
3636
3637 s.hdr.class = VIRTIO_NET_CTRL_MQ;
3638 s.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET;
3639 s.mq.virtqueue_pairs = vtnet_gtoh16(sc, npairs);
3640 s.ack = VIRTIO_NET_ERR;
3641
3642 sglist_init(&sg, nitems(segs), segs);
3643 error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3644 error |= sglist_append(&sg, &s.mq, sizeof(struct virtio_net_ctrl_mq));
3645 error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3646 MPASS(error == 0 && sg.sg_nseg == nitems(segs));
3647
3648 if (error == 0)
3649 vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3650
3651 return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3652 }
3653
3654 static int
vtnet_ctrl_rx_cmd(struct vtnet_softc * sc,uint8_t cmd,bool on)3655 vtnet_ctrl_rx_cmd(struct vtnet_softc *sc, uint8_t cmd, bool on)
3656 {
3657 struct sglist_seg segs[3];
3658 struct sglist sg;
3659 struct {
3660 struct virtio_net_ctrl_hdr hdr __aligned(2);
3661 uint8_t pad1;
3662 uint8_t onoff;
3663 uint8_t pad2;
3664 uint8_t ack;
3665 } s;
3666 int error;
3667
3668 error = 0;
3669 MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_RX);
3670
3671 s.hdr.class = VIRTIO_NET_CTRL_RX;
3672 s.hdr.cmd = cmd;
3673 s.onoff = on;
3674 s.ack = VIRTIO_NET_ERR;
3675
3676 sglist_init(&sg, nitems(segs), segs);
3677 error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3678 error |= sglist_append(&sg, &s.onoff, sizeof(uint8_t));
3679 error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3680 MPASS(error == 0 && sg.sg_nseg == nitems(segs));
3681
3682 if (error == 0)
3683 vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3684
3685 return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3686 }
3687
3688 static int
vtnet_set_promisc(struct vtnet_softc * sc,bool on)3689 vtnet_set_promisc(struct vtnet_softc *sc, bool on)
3690 {
3691 return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_PROMISC, on));
3692 }
3693
3694 static int
vtnet_set_allmulti(struct vtnet_softc * sc,bool on)3695 vtnet_set_allmulti(struct vtnet_softc *sc, bool on)
3696 {
3697 return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, on));
3698 }
3699
3700 static void
vtnet_rx_filter(struct vtnet_softc * sc)3701 vtnet_rx_filter(struct vtnet_softc *sc)
3702 {
3703 device_t dev;
3704 if_t ifp;
3705
3706 dev = sc->vtnet_dev;
3707 ifp = sc->vtnet_ifp;
3708
3709 VTNET_CORE_LOCK_ASSERT(sc);
3710
3711 if (vtnet_set_promisc(sc, if_getflags(ifp) & IFF_PROMISC) != 0) {
3712 device_printf(dev, "cannot %s promiscuous mode\n",
3713 if_getflags(ifp) & IFF_PROMISC ? "enable" : "disable");
3714 }
3715
3716 if (vtnet_set_allmulti(sc, if_getflags(ifp) & IFF_ALLMULTI) != 0) {
3717 device_printf(dev, "cannot %s all-multicast mode\n",
3718 if_getflags(ifp) & IFF_ALLMULTI ? "enable" : "disable");
3719 }
3720 }
3721
3722 static u_int
vtnet_copy_ifaddr(void * arg,struct sockaddr_dl * sdl,u_int ucnt)3723 vtnet_copy_ifaddr(void *arg, struct sockaddr_dl *sdl, u_int ucnt)
3724 {
3725 struct vtnet_softc *sc = arg;
3726
3727 if (memcmp(LLADDR(sdl), sc->vtnet_hwaddr, ETHER_ADDR_LEN) == 0)
3728 return (0);
3729
3730 if (ucnt < VTNET_MAX_MAC_ENTRIES)
3731 bcopy(LLADDR(sdl),
3732 &sc->vtnet_mac_filter->vmf_unicast.macs[ucnt],
3733 ETHER_ADDR_LEN);
3734
3735 return (1);
3736 }
3737
3738 static u_int
vtnet_copy_maddr(void * arg,struct sockaddr_dl * sdl,u_int mcnt)3739 vtnet_copy_maddr(void *arg, struct sockaddr_dl *sdl, u_int mcnt)
3740 {
3741 struct vtnet_mac_filter *filter = arg;
3742
3743 if (mcnt < VTNET_MAX_MAC_ENTRIES)
3744 bcopy(LLADDR(sdl), &filter->vmf_multicast.macs[mcnt],
3745 ETHER_ADDR_LEN);
3746
3747 return (1);
3748 }
3749
3750 static void
vtnet_rx_filter_mac(struct vtnet_softc * sc)3751 vtnet_rx_filter_mac(struct vtnet_softc *sc)
3752 {
3753 struct virtio_net_ctrl_hdr hdr __aligned(2);
3754 struct vtnet_mac_filter *filter;
3755 struct sglist_seg segs[4];
3756 struct sglist sg;
3757 if_t ifp;
3758 bool promisc, allmulti;
3759 u_int ucnt, mcnt;
3760 int error;
3761 uint8_t ack;
3762
3763 ifp = sc->vtnet_ifp;
3764 filter = sc->vtnet_mac_filter;
3765 error = 0;
3766
3767 MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_RX);
3768 VTNET_CORE_LOCK_ASSERT(sc);
3769
3770 /* Unicast MAC addresses: */
3771 ucnt = if_foreach_lladdr(ifp, vtnet_copy_ifaddr, sc);
3772 promisc = (ucnt > VTNET_MAX_MAC_ENTRIES);
3773
3774 if (promisc) {
3775 ucnt = 0;
3776 if_printf(ifp, "more than %d MAC addresses assigned, "
3777 "falling back to promiscuous mode\n",
3778 VTNET_MAX_MAC_ENTRIES);
3779 }
3780
3781 /* Multicast MAC addresses: */
3782 mcnt = if_foreach_llmaddr(ifp, vtnet_copy_maddr, filter);
3783 allmulti = (mcnt > VTNET_MAX_MAC_ENTRIES);
3784
3785 if (allmulti) {
3786 mcnt = 0;
3787 if_printf(ifp, "more than %d multicast MAC addresses "
3788 "assigned, falling back to all-multicast mode\n",
3789 VTNET_MAX_MAC_ENTRIES);
3790 }
3791
3792 if (promisc && allmulti)
3793 goto out;
3794
3795 filter->vmf_unicast.nentries = vtnet_gtoh32(sc, ucnt);
3796 filter->vmf_multicast.nentries = vtnet_gtoh32(sc, mcnt);
3797
3798 hdr.class = VIRTIO_NET_CTRL_MAC;
3799 hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
3800 ack = VIRTIO_NET_ERR;
3801
3802 sglist_init(&sg, nitems(segs), segs);
3803 error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr));
3804 error |= sglist_append(&sg, &filter->vmf_unicast,
3805 sizeof(uint32_t) + ucnt * ETHER_ADDR_LEN);
3806 error |= sglist_append(&sg, &filter->vmf_multicast,
3807 sizeof(uint32_t) + mcnt * ETHER_ADDR_LEN);
3808 error |= sglist_append(&sg, &ack, sizeof(uint8_t));
3809 MPASS(error == 0 && sg.sg_nseg == nitems(segs));
3810
3811 if (error == 0)
3812 vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1);
3813 if (ack != VIRTIO_NET_OK)
3814 if_printf(ifp, "error setting host MAC filter table\n");
3815
3816 out:
3817 if (promisc != 0 && vtnet_set_promisc(sc, true) != 0)
3818 if_printf(ifp, "cannot enable promiscuous mode\n");
3819 if (allmulti != 0 && vtnet_set_allmulti(sc, true) != 0)
3820 if_printf(ifp, "cannot enable all-multicast mode\n");
3821 }
3822
3823 static int
vtnet_exec_vlan_filter(struct vtnet_softc * sc,int add,uint16_t tag)3824 vtnet_exec_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
3825 {
3826 struct sglist_seg segs[3];
3827 struct sglist sg;
3828 struct {
3829 struct virtio_net_ctrl_hdr hdr __aligned(2);
3830 uint8_t pad1;
3831 uint16_t tag __aligned(2);
3832 uint8_t pad2;
3833 uint8_t ack;
3834 } s;
3835 int error;
3836
3837 error = 0;
3838 MPASS(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER);
3839
3840 s.hdr.class = VIRTIO_NET_CTRL_VLAN;
3841 s.hdr.cmd = add ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
3842 s.tag = vtnet_gtoh16(sc, tag);
3843 s.ack = VIRTIO_NET_ERR;
3844
3845 sglist_init(&sg, nitems(segs), segs);
3846 error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3847 error |= sglist_append(&sg, &s.tag, sizeof(uint16_t));
3848 error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3849 MPASS(error == 0 && sg.sg_nseg == nitems(segs));
3850
3851 if (error == 0)
3852 vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3853
3854 return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3855 }
3856
3857 static void
vtnet_rx_filter_vlan(struct vtnet_softc * sc)3858 vtnet_rx_filter_vlan(struct vtnet_softc *sc)
3859 {
3860 int i, bit;
3861 uint32_t w;
3862 uint16_t tag;
3863
3864 MPASS(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER);
3865 VTNET_CORE_LOCK_ASSERT(sc);
3866
3867 /* Enable the filter for each configured VLAN. */
3868 for (i = 0; i < VTNET_VLAN_FILTER_NWORDS; i++) {
3869 w = sc->vtnet_vlan_filter[i];
3870
3871 while ((bit = ffs(w) - 1) != -1) {
3872 w &= ~(1 << bit);
3873 tag = sizeof(w) * CHAR_BIT * i + bit;
3874
3875 if (vtnet_exec_vlan_filter(sc, 1, tag) != 0) {
3876 device_printf(sc->vtnet_dev,
3877 "cannot enable VLAN %d filter\n", tag);
3878 }
3879 }
3880 }
3881 }
3882
3883 static void
vtnet_update_vlan_filter(struct vtnet_softc * sc,int add,uint16_t tag)3884 vtnet_update_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
3885 {
3886 if_t ifp;
3887 int idx, bit;
3888
3889 ifp = sc->vtnet_ifp;
3890 idx = (tag >> 5) & 0x7F;
3891 bit = tag & 0x1F;
3892
3893 if (tag == 0 || tag > 4095)
3894 return;
3895
3896 VTNET_CORE_LOCK(sc);
3897
3898 if (add)
3899 sc->vtnet_vlan_filter[idx] |= (1 << bit);
3900 else
3901 sc->vtnet_vlan_filter[idx] &= ~(1 << bit);
3902
3903 if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER &&
3904 if_getdrvflags(ifp) & IFF_DRV_RUNNING &&
3905 vtnet_exec_vlan_filter(sc, add, tag) != 0) {
3906 device_printf(sc->vtnet_dev,
3907 "cannot %s VLAN %d %s the host filter table\n",
3908 add ? "add" : "remove", tag, add ? "to" : "from");
3909 }
3910
3911 VTNET_CORE_UNLOCK(sc);
3912 }
3913
3914 static void
vtnet_register_vlan(void * arg,if_t ifp,uint16_t tag)3915 vtnet_register_vlan(void *arg, if_t ifp, uint16_t tag)
3916 {
3917
3918 if (if_getsoftc(ifp) != arg)
3919 return;
3920
3921 vtnet_update_vlan_filter(arg, 1, tag);
3922 }
3923
3924 static void
vtnet_unregister_vlan(void * arg,if_t ifp,uint16_t tag)3925 vtnet_unregister_vlan(void *arg, if_t ifp, uint16_t tag)
3926 {
3927
3928 if (if_getsoftc(ifp) != arg)
3929 return;
3930
3931 vtnet_update_vlan_filter(arg, 0, tag);
3932 }
3933
3934 static void
vtnet_update_speed_duplex(struct vtnet_softc * sc)3935 vtnet_update_speed_duplex(struct vtnet_softc *sc)
3936 {
3937 if_t ifp;
3938 uint32_t speed;
3939
3940 ifp = sc->vtnet_ifp;
3941
3942 if ((sc->vtnet_features & VIRTIO_NET_F_SPEED_DUPLEX) == 0)
3943 return;
3944
3945 /* BMV: Ignore duplex. */
3946 speed = virtio_read_dev_config_4(sc->vtnet_dev,
3947 offsetof(struct virtio_net_config, speed));
3948 if (speed != UINT32_MAX)
3949 if_setbaudrate(ifp, IF_Mbps(speed));
3950 }
3951
3952 static int
vtnet_is_link_up(struct vtnet_softc * sc)3953 vtnet_is_link_up(struct vtnet_softc *sc)
3954 {
3955 uint16_t status;
3956
3957 if ((sc->vtnet_features & VIRTIO_NET_F_STATUS) == 0)
3958 return (1);
3959
3960 status = virtio_read_dev_config_2(sc->vtnet_dev,
3961 offsetof(struct virtio_net_config, status));
3962
3963 return ((status & VIRTIO_NET_S_LINK_UP) != 0);
3964 }
3965
3966 static void
vtnet_update_link_status(struct vtnet_softc * sc)3967 vtnet_update_link_status(struct vtnet_softc *sc)
3968 {
3969 if_t ifp;
3970 int link;
3971
3972 ifp = sc->vtnet_ifp;
3973 VTNET_CORE_LOCK_ASSERT(sc);
3974 link = vtnet_is_link_up(sc);
3975
3976 /* Notify if the link status has changed. */
3977 if (link != 0 && sc->vtnet_link_active == 0) {
3978 vtnet_update_speed_duplex(sc);
3979 sc->vtnet_link_active = 1;
3980 if_link_state_change(ifp, LINK_STATE_UP);
3981 } else if (link == 0 && sc->vtnet_link_active != 0) {
3982 sc->vtnet_link_active = 0;
3983 if_link_state_change(ifp, LINK_STATE_DOWN);
3984 }
3985 }
3986
3987 static int
vtnet_ifmedia_upd(if_t ifp __unused)3988 vtnet_ifmedia_upd(if_t ifp __unused)
3989 {
3990 return (EOPNOTSUPP);
3991 }
3992
3993 static void
vtnet_ifmedia_sts(if_t ifp,struct ifmediareq * ifmr)3994 vtnet_ifmedia_sts(if_t ifp, struct ifmediareq *ifmr)
3995 {
3996 struct vtnet_softc *sc;
3997
3998 sc = if_getsoftc(ifp);
3999
4000 ifmr->ifm_status = IFM_AVALID;
4001 ifmr->ifm_active = IFM_ETHER;
4002
4003 VTNET_CORE_LOCK(sc);
4004 if (vtnet_is_link_up(sc) != 0) {
4005 ifmr->ifm_status |= IFM_ACTIVE;
4006 ifmr->ifm_active |= IFM_10G_T | IFM_FDX;
4007 } else
4008 ifmr->ifm_active |= IFM_NONE;
4009 VTNET_CORE_UNLOCK(sc);
4010 }
4011
4012 static void
vtnet_get_macaddr(struct vtnet_softc * sc)4013 vtnet_get_macaddr(struct vtnet_softc *sc)
4014 {
4015
4016 if (sc->vtnet_flags & VTNET_FLAG_MAC) {
4017 virtio_read_device_config_array(sc->vtnet_dev,
4018 offsetof(struct virtio_net_config, mac),
4019 &sc->vtnet_hwaddr[0], sizeof(uint8_t), ETHER_ADDR_LEN);
4020 } else {
4021 /* Generate a random locally administered unicast address. */
4022 sc->vtnet_hwaddr[0] = 0xB2;
4023 arc4rand(&sc->vtnet_hwaddr[1], ETHER_ADDR_LEN - 1, 0);
4024 }
4025 }
4026
4027 static void
vtnet_set_macaddr(struct vtnet_softc * sc)4028 vtnet_set_macaddr(struct vtnet_softc *sc)
4029 {
4030 device_t dev;
4031 int error;
4032
4033 dev = sc->vtnet_dev;
4034
4035 if (sc->vtnet_flags & VTNET_FLAG_CTRL_MAC) {
4036 error = vtnet_ctrl_mac_cmd(sc, sc->vtnet_hwaddr);
4037 if (error)
4038 device_printf(dev, "unable to set MAC address\n");
4039 return;
4040 }
4041
4042 /* MAC in config is read-only in modern VirtIO. */
4043 if (!vtnet_modern(sc) && sc->vtnet_flags & VTNET_FLAG_MAC) {
4044 for (int i = 0; i < ETHER_ADDR_LEN; i++) {
4045 virtio_write_dev_config_1(dev,
4046 offsetof(struct virtio_net_config, mac) + i,
4047 sc->vtnet_hwaddr[i]);
4048 }
4049 }
4050 }
4051
4052 static void
vtnet_attached_set_macaddr(struct vtnet_softc * sc)4053 vtnet_attached_set_macaddr(struct vtnet_softc *sc)
4054 {
4055
4056 /* Assign MAC address if it was generated. */
4057 if ((sc->vtnet_flags & VTNET_FLAG_MAC) == 0)
4058 vtnet_set_macaddr(sc);
4059 }
4060
4061 static void
vtnet_vlan_tag_remove(struct mbuf * m)4062 vtnet_vlan_tag_remove(struct mbuf *m)
4063 {
4064 struct ether_vlan_header *evh;
4065
4066 evh = mtod(m, struct ether_vlan_header *);
4067 m->m_pkthdr.ether_vtag = ntohs(evh->evl_tag);
4068 m->m_flags |= M_VLANTAG;
4069
4070 /* Strip the 802.1Q header. */
4071 bcopy((char *) evh, (char *) evh + ETHER_VLAN_ENCAP_LEN,
4072 ETHER_HDR_LEN - ETHER_TYPE_LEN);
4073 m_adj(m, ETHER_VLAN_ENCAP_LEN);
4074 }
4075
4076 static void
vtnet_set_rx_process_limit(struct vtnet_softc * sc)4077 vtnet_set_rx_process_limit(struct vtnet_softc *sc)
4078 {
4079 int limit;
4080
4081 limit = vtnet_tunable_int(sc, "rx_process_limit",
4082 vtnet_rx_process_limit);
4083 if (limit < 0)
4084 limit = INT_MAX;
4085 sc->vtnet_rx_process_limit = limit;
4086 }
4087
4088 static void
vtnet_setup_rxq_sysctl(struct sysctl_ctx_list * ctx,struct sysctl_oid_list * child,struct vtnet_rxq * rxq)4089 vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *ctx,
4090 struct sysctl_oid_list *child, struct vtnet_rxq *rxq)
4091 {
4092 struct sysctl_oid *node;
4093 struct sysctl_oid_list *list;
4094 struct vtnet_rxq_stats *stats;
4095 char namebuf[16];
4096
4097 snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vtnrx_id);
4098 node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
4099 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Receive Queue");
4100 list = SYSCTL_CHILDREN(node);
4101
4102 stats = &rxq->vtnrx_stats;
4103
4104 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", CTLFLAG_RD,
4105 &stats->vrxs_ipackets, "Receive packets");
4106 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", CTLFLAG_RD,
4107 &stats->vrxs_ibytes, "Receive bytes");
4108 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", CTLFLAG_RD,
4109 &stats->vrxs_iqdrops, "Receive drops");
4110 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", CTLFLAG_RD,
4111 &stats->vrxs_ierrors, "Receive errors");
4112 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
4113 &stats->vrxs_csum, "Receive checksum offloaded");
4114 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum_failed", CTLFLAG_RD,
4115 &stats->vrxs_csum_failed, "Receive checksum offload failed");
4116 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "host_lro", CTLFLAG_RD,
4117 &stats->vrxs_host_lro, "Receive host segmentation offloaded");
4118 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD,
4119 &stats->vrxs_rescheduled,
4120 "Receive interrupt handler rescheduled");
4121 }
4122
4123 static void
vtnet_setup_txq_sysctl(struct sysctl_ctx_list * ctx,struct sysctl_oid_list * child,struct vtnet_txq * txq)4124 vtnet_setup_txq_sysctl(struct sysctl_ctx_list *ctx,
4125 struct sysctl_oid_list *child, struct vtnet_txq *txq)
4126 {
4127 struct sysctl_oid *node;
4128 struct sysctl_oid_list *list;
4129 struct vtnet_txq_stats *stats;
4130 char namebuf[16];
4131
4132 snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vtntx_id);
4133 node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
4134 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Transmit Queue");
4135 list = SYSCTL_CHILDREN(node);
4136
4137 stats = &txq->vtntx_stats;
4138
4139 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", CTLFLAG_RD,
4140 &stats->vtxs_opackets, "Transmit packets");
4141 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", CTLFLAG_RD,
4142 &stats->vtxs_obytes, "Transmit bytes");
4143 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", CTLFLAG_RD,
4144 &stats->vtxs_omcasts, "Transmit multicasts");
4145 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
4146 &stats->vtxs_csum, "Transmit checksum offloaded");
4147 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD,
4148 &stats->vtxs_tso, "Transmit TCP segmentation offloaded");
4149 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD,
4150 &stats->vtxs_rescheduled,
4151 "Transmit interrupt handler rescheduled");
4152 }
4153
4154 static void
vtnet_setup_queue_sysctl(struct vtnet_softc * sc)4155 vtnet_setup_queue_sysctl(struct vtnet_softc *sc)
4156 {
4157 device_t dev;
4158 struct sysctl_ctx_list *ctx;
4159 struct sysctl_oid *tree;
4160 struct sysctl_oid_list *child;
4161 int i;
4162
4163 dev = sc->vtnet_dev;
4164 ctx = device_get_sysctl_ctx(dev);
4165 tree = device_get_sysctl_tree(dev);
4166 child = SYSCTL_CHILDREN(tree);
4167
4168 for (i = 0; i < sc->vtnet_req_vq_pairs; i++) {
4169 vtnet_setup_rxq_sysctl(ctx, child, &sc->vtnet_rxqs[i]);
4170 vtnet_setup_txq_sysctl(ctx, child, &sc->vtnet_txqs[i]);
4171 }
4172 }
4173
4174 static void
vtnet_setup_stat_sysctl(struct sysctl_ctx_list * ctx,struct sysctl_oid_list * child,struct vtnet_softc * sc)4175 vtnet_setup_stat_sysctl(struct sysctl_ctx_list *ctx,
4176 struct sysctl_oid_list *child, struct vtnet_softc *sc)
4177 {
4178 struct vtnet_statistics *stats;
4179 struct vtnet_rxq_stats rxaccum;
4180 struct vtnet_txq_stats txaccum;
4181
4182 vtnet_accum_stats(sc, &rxaccum, &txaccum);
4183
4184 stats = &sc->vtnet_stats;
4185 stats->rx_csum_offloaded = rxaccum.vrxs_csum;
4186 stats->rx_csum_failed = rxaccum.vrxs_csum_failed;
4187 stats->rx_task_rescheduled = rxaccum.vrxs_rescheduled;
4188 stats->tx_csum_offloaded = txaccum.vtxs_csum;
4189 stats->tx_tso_offloaded = txaccum.vtxs_tso;
4190 stats->tx_task_rescheduled = txaccum.vtxs_rescheduled;
4191
4192 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "mbuf_alloc_failed",
4193 CTLFLAG_RD, &stats->mbuf_alloc_failed,
4194 "Mbuf cluster allocation failures");
4195
4196 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_frame_too_large",
4197 CTLFLAG_RD, &stats->rx_frame_too_large,
4198 "Received frame larger than the mbuf chain");
4199 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_enq_replacement_failed",
4200 CTLFLAG_RD, &stats->rx_enq_replacement_failed,
4201 "Enqueuing the replacement receive mbuf failed");
4202 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_mergeable_failed",
4203 CTLFLAG_RD, &stats->rx_mergeable_failed,
4204 "Mergeable buffers receive failures");
4205 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ethtype",
4206 CTLFLAG_RD, &stats->rx_csum_bad_ethtype,
4207 "Received checksum offloaded buffer with unsupported "
4208 "Ethernet type");
4209 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ipproto",
4210 CTLFLAG_RD, &stats->rx_csum_bad_ipproto,
4211 "Received checksum offloaded buffer with incorrect IP protocol");
4212 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_offset",
4213 CTLFLAG_RD, &stats->rx_csum_bad_offset,
4214 "Received checksum offloaded buffer with incorrect offset");
4215 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_proto",
4216 CTLFLAG_RD, &stats->rx_csum_bad_proto,
4217 "Received checksum offloaded buffer with incorrect protocol");
4218 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_failed",
4219 CTLFLAG_RD, &stats->rx_csum_failed,
4220 "Received buffer checksum offload failed");
4221 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_offloaded",
4222 CTLFLAG_RD, &stats->rx_csum_offloaded,
4223 "Received buffer checksum offload succeeded");
4224 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_task_rescheduled",
4225 CTLFLAG_RD, &stats->rx_task_rescheduled,
4226 "Times the receive interrupt task rescheduled itself");
4227
4228 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_unknown_ethtype",
4229 CTLFLAG_RD, &stats->tx_csum_unknown_ethtype,
4230 "Aborted transmit of checksum offloaded buffer with unknown "
4231 "Ethernet type");
4232 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_proto_mismatch",
4233 CTLFLAG_RD, &stats->tx_csum_proto_mismatch,
4234 "Aborted transmit of checksum offloaded buffer because mismatched "
4235 "protocols");
4236 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_not_tcp",
4237 CTLFLAG_RD, &stats->tx_tso_not_tcp,
4238 "Aborted transmit of TSO buffer with non TCP protocol");
4239 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_without_csum",
4240 CTLFLAG_RD, &stats->tx_tso_without_csum,
4241 "Aborted transmit of TSO buffer without TCP checksum offload");
4242 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defragged",
4243 CTLFLAG_RD, &stats->tx_defragged,
4244 "Transmit mbufs defragged");
4245 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defrag_failed",
4246 CTLFLAG_RD, &stats->tx_defrag_failed,
4247 "Aborted transmit of buffer because defrag failed");
4248 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_offloaded",
4249 CTLFLAG_RD, &stats->tx_csum_offloaded,
4250 "Offloaded checksum of transmitted buffer");
4251 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_offloaded",
4252 CTLFLAG_RD, &stats->tx_tso_offloaded,
4253 "Segmentation offload of transmitted buffer");
4254 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_task_rescheduled",
4255 CTLFLAG_RD, &stats->tx_task_rescheduled,
4256 "Times the transmit interrupt task rescheduled itself");
4257 }
4258
4259 static void
vtnet_setup_sysctl(struct vtnet_softc * sc)4260 vtnet_setup_sysctl(struct vtnet_softc *sc)
4261 {
4262 device_t dev;
4263 struct sysctl_ctx_list *ctx;
4264 struct sysctl_oid *tree;
4265 struct sysctl_oid_list *child;
4266
4267 dev = sc->vtnet_dev;
4268 ctx = device_get_sysctl_ctx(dev);
4269 tree = device_get_sysctl_tree(dev);
4270 child = SYSCTL_CHILDREN(tree);
4271
4272 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_vq_pairs",
4273 CTLFLAG_RD, &sc->vtnet_max_vq_pairs, 0,
4274 "Number of maximum supported virtqueue pairs");
4275 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "req_vq_pairs",
4276 CTLFLAG_RD, &sc->vtnet_req_vq_pairs, 0,
4277 "Number of requested virtqueue pairs");
4278 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "act_vq_pairs",
4279 CTLFLAG_RD, &sc->vtnet_act_vq_pairs, 0,
4280 "Number of active virtqueue pairs");
4281
4282 vtnet_setup_stat_sysctl(ctx, child, sc);
4283 }
4284
4285 static void
vtnet_load_tunables(struct vtnet_softc * sc)4286 vtnet_load_tunables(struct vtnet_softc *sc)
4287 {
4288
4289 sc->vtnet_lro_entry_count = vtnet_tunable_int(sc,
4290 "lro_entry_count", vtnet_lro_entry_count);
4291 if (sc->vtnet_lro_entry_count < TCP_LRO_ENTRIES)
4292 sc->vtnet_lro_entry_count = TCP_LRO_ENTRIES;
4293
4294 sc->vtnet_lro_mbufq_depth = vtnet_tunable_int(sc,
4295 "lro_mbufq_depth", vtnet_lro_mbufq_depth);
4296 }
4297
4298 static int
vtnet_rxq_enable_intr(struct vtnet_rxq * rxq)4299 vtnet_rxq_enable_intr(struct vtnet_rxq *rxq)
4300 {
4301
4302 return (virtqueue_enable_intr(rxq->vtnrx_vq));
4303 }
4304
4305 static void
vtnet_rxq_disable_intr(struct vtnet_rxq * rxq)4306 vtnet_rxq_disable_intr(struct vtnet_rxq *rxq)
4307 {
4308
4309 virtqueue_disable_intr(rxq->vtnrx_vq);
4310 }
4311
4312 static int
vtnet_txq_enable_intr(struct vtnet_txq * txq)4313 vtnet_txq_enable_intr(struct vtnet_txq *txq)
4314 {
4315 struct virtqueue *vq;
4316
4317 vq = txq->vtntx_vq;
4318
4319 if (vtnet_txq_below_threshold(txq) != 0)
4320 return (virtqueue_postpone_intr(vq, VQ_POSTPONE_LONG));
4321
4322 /*
4323 * The free count is above our threshold. Keep the Tx interrupt
4324 * disabled until the queue is fuller.
4325 */
4326 return (0);
4327 }
4328
4329 static void
vtnet_txq_disable_intr(struct vtnet_txq * txq)4330 vtnet_txq_disable_intr(struct vtnet_txq *txq)
4331 {
4332
4333 virtqueue_disable_intr(txq->vtntx_vq);
4334 }
4335
4336 static void
vtnet_enable_rx_interrupts(struct vtnet_softc * sc)4337 vtnet_enable_rx_interrupts(struct vtnet_softc *sc)
4338 {
4339 struct vtnet_rxq *rxq;
4340 int i;
4341
4342 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
4343 rxq = &sc->vtnet_rxqs[i];
4344 if (vtnet_rxq_enable_intr(rxq) != 0)
4345 taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
4346 }
4347 }
4348
4349 static void
vtnet_enable_tx_interrupts(struct vtnet_softc * sc)4350 vtnet_enable_tx_interrupts(struct vtnet_softc *sc)
4351 {
4352 int i;
4353
4354 for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
4355 vtnet_txq_enable_intr(&sc->vtnet_txqs[i]);
4356 }
4357
4358 static void
vtnet_enable_interrupts(struct vtnet_softc * sc)4359 vtnet_enable_interrupts(struct vtnet_softc *sc)
4360 {
4361
4362 vtnet_enable_rx_interrupts(sc);
4363 vtnet_enable_tx_interrupts(sc);
4364 }
4365
4366 static void
vtnet_disable_rx_interrupts(struct vtnet_softc * sc)4367 vtnet_disable_rx_interrupts(struct vtnet_softc *sc)
4368 {
4369 int i;
4370
4371 for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
4372 vtnet_rxq_disable_intr(&sc->vtnet_rxqs[i]);
4373 }
4374
4375 static void
vtnet_disable_tx_interrupts(struct vtnet_softc * sc)4376 vtnet_disable_tx_interrupts(struct vtnet_softc *sc)
4377 {
4378 int i;
4379
4380 for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
4381 vtnet_txq_disable_intr(&sc->vtnet_txqs[i]);
4382 }
4383
4384 static void
vtnet_disable_interrupts(struct vtnet_softc * sc)4385 vtnet_disable_interrupts(struct vtnet_softc *sc)
4386 {
4387
4388 vtnet_disable_rx_interrupts(sc);
4389 vtnet_disable_tx_interrupts(sc);
4390 }
4391
4392 static int
vtnet_tunable_int(struct vtnet_softc * sc,const char * knob,int def)4393 vtnet_tunable_int(struct vtnet_softc *sc, const char *knob, int def)
4394 {
4395 char path[64];
4396
4397 snprintf(path, sizeof(path),
4398 "hw.vtnet.%d.%s", device_get_unit(sc->vtnet_dev), knob);
4399 TUNABLE_INT_FETCH(path, &def);
4400
4401 return (def);
4402 }
4403
4404 #ifdef DEBUGNET
4405 static void
vtnet_debugnet_init(if_t ifp,int * nrxr,int * ncl,int * clsize)4406 vtnet_debugnet_init(if_t ifp, int *nrxr, int *ncl, int *clsize)
4407 {
4408 struct vtnet_softc *sc;
4409
4410 sc = if_getsoftc(ifp);
4411
4412 VTNET_CORE_LOCK(sc);
4413 *nrxr = sc->vtnet_req_vq_pairs;
4414 *ncl = DEBUGNET_MAX_IN_FLIGHT;
4415 *clsize = sc->vtnet_rx_clustersz;
4416 VTNET_CORE_UNLOCK(sc);
4417 }
4418
4419 static void
vtnet_debugnet_event(if_t ifp __unused,enum debugnet_ev event)4420 vtnet_debugnet_event(if_t ifp __unused, enum debugnet_ev event)
4421 {
4422 struct vtnet_softc *sc;
4423 static bool sw_lro_enabled = false;
4424
4425 /*
4426 * Disable software LRO, since it would require entering the network
4427 * epoch when calling vtnet_txq_eof() in vtnet_debugnet_poll().
4428 */
4429 sc = if_getsoftc(ifp);
4430 switch (event) {
4431 case DEBUGNET_START:
4432 sw_lro_enabled = (sc->vtnet_flags & VTNET_FLAG_SW_LRO) != 0;
4433 if (sw_lro_enabled)
4434 sc->vtnet_flags &= ~VTNET_FLAG_SW_LRO;
4435 break;
4436 case DEBUGNET_END:
4437 if (sw_lro_enabled)
4438 sc->vtnet_flags |= VTNET_FLAG_SW_LRO;
4439 break;
4440 }
4441 }
4442
4443 static int
vtnet_debugnet_transmit(if_t ifp,struct mbuf * m)4444 vtnet_debugnet_transmit(if_t ifp, struct mbuf *m)
4445 {
4446 struct vtnet_softc *sc;
4447 struct vtnet_txq *txq;
4448 int error;
4449
4450 sc = if_getsoftc(ifp);
4451 if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
4452 IFF_DRV_RUNNING)
4453 return (EBUSY);
4454
4455 txq = &sc->vtnet_txqs[0];
4456 error = vtnet_txq_encap(txq, &m, M_NOWAIT | M_USE_RESERVE);
4457 if (error == 0)
4458 (void)vtnet_txq_notify(txq);
4459 return (error);
4460 }
4461
4462 static int
vtnet_debugnet_poll(if_t ifp,int count)4463 vtnet_debugnet_poll(if_t ifp, int count)
4464 {
4465 struct vtnet_softc *sc;
4466 int i;
4467
4468 sc = if_getsoftc(ifp);
4469 if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
4470 IFF_DRV_RUNNING)
4471 return (EBUSY);
4472
4473 (void)vtnet_txq_eof(&sc->vtnet_txqs[0]);
4474 for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
4475 (void)vtnet_rxq_eof(&sc->vtnet_rxqs[i]);
4476 return (0);
4477 }
4478 #endif /* DEBUGNET */
4479