1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2019 Vincenzo Maffione <[email protected]>
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
19 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
20 * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
21 * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
24 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
25 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 *
27 * $FreeBSD$
28 */
29
30 /*
31 * This file implements multiple network backends (tap, netmap, ...),
32 * to be used by network frontends such as virtio-net and e1000.
33 * The API to access the backend (e.g. send/receive packets, negotiate
34 * features) is exported by net_backends.h.
35 */
36
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39
40 #include <sys/types.h> /* u_short etc */
41 #ifndef WITHOUT_CAPSICUM
42 #include <sys/capsicum.h>
43 #endif
44 #include <sys/ioctl.h>
45 #include <sys/mman.h>
46 #include <sys/uio.h>
47
48 #include <net/if.h>
49 #include <net/netmap.h>
50 #include <net/netmap_virt.h>
51 #define NETMAP_WITH_LIBS
52 #include <net/netmap_user.h>
53
54 #ifndef WITHOUT_CAPSICUM
55 #include <capsicum_helpers.h>
56 #endif
57 #include <err.h>
58 #include <errno.h>
59 #include <fcntl.h>
60 #include <stdio.h>
61 #include <stdlib.h>
62 #include <stdint.h>
63 #include <string.h>
64 #include <unistd.h>
65 #include <sysexits.h>
66 #include <assert.h>
67 #include <pthread.h>
68 #include <pthread_np.h>
69 #include <poll.h>
70 #include <assert.h>
71
72
73 #include "iov.h"
74 #include "mevent.h"
75 #include "net_backends.h"
76
77 #include <sys/linker_set.h>
78
79 /*
80 * Each network backend registers a set of function pointers that are
81 * used to implement the net backends API.
82 * This might need to be exposed if we implement backends in separate files.
83 */
84 struct net_backend {
85 const char *prefix; /* prefix matching this backend */
86
87 /*
88 * Routines used to initialize and cleanup the resources needed
89 * by a backend. The cleanup function is used internally,
90 * and should not be called by the frontend.
91 */
92 int (*init)(struct net_backend *be, const char *devname,
93 net_be_rxeof_t cb, void *param);
94 void (*cleanup)(struct net_backend *be);
95
96 /*
97 * Called to serve a guest transmit request. The scatter-gather
98 * vector provided by the caller has 'iovcnt' elements and contains
99 * the packet to send.
100 */
101 ssize_t (*send)(struct net_backend *be, struct iovec *iov, int iovcnt);
102
103 /*
104 * Called to receive a packet from the backend. When the function
105 * returns a positive value 'len', the scatter-gather vector
106 * provided by the caller contains a packet with such length.
107 * The function returns 0 if the backend doesn't have a new packet to
108 * receive.
109 */
110 ssize_t (*recv)(struct net_backend *be, struct iovec *iov, int iovcnt);
111
112 /*
113 * Ask the backend for the virtio-net features it is able to
114 * support. Possible features are TSO, UFO and checksum offloading
115 * in both rx and tx direction and for both IPv4 and IPv6.
116 */
117 uint64_t (*get_cap)(struct net_backend *be);
118
119 /*
120 * Tell the backend to enable/disable the specified virtio-net
121 * features (capabilities).
122 */
123 int (*set_cap)(struct net_backend *be, uint64_t features,
124 unsigned int vnet_hdr_len);
125
126 struct pci_vtnet_softc *sc;
127 int fd;
128
129 /*
130 * Length of the virtio-net header used by the backend and the
131 * frontend, respectively. A zero value means that the header
132 * is not used.
133 */
134 unsigned int be_vnet_hdr_len;
135 unsigned int fe_vnet_hdr_len;
136
137 /* Size of backend-specific private data. */
138 size_t priv_size;
139
140 /* Room for backend-specific data. */
141 char opaque[0];
142 };
143
144 SET_DECLARE(net_backend_set, struct net_backend);
145
146 #define VNET_HDR_LEN sizeof(struct virtio_net_rxhdr)
147
148 #define WPRINTF(params) printf params
149
150 /*
151 * The tap backend
152 */
153
154 struct tap_priv {
155 struct mevent *mevp;
156 };
157
158 static void
tap_cleanup(struct net_backend * be)159 tap_cleanup(struct net_backend *be)
160 {
161 struct tap_priv *priv = (struct tap_priv *)be->opaque;
162
163 if (priv->mevp) {
164 mevent_delete(priv->mevp);
165 }
166 if (be->fd != -1) {
167 close(be->fd);
168 be->fd = -1;
169 }
170 }
171
172 static int
tap_init(struct net_backend * be,const char * devname,net_be_rxeof_t cb,void * param)173 tap_init(struct net_backend *be, const char *devname,
174 net_be_rxeof_t cb, void *param)
175 {
176 struct tap_priv *priv = (struct tap_priv *)be->opaque;
177 char tbuf[80];
178 int opt = 1;
179 #ifndef WITHOUT_CAPSICUM
180 cap_rights_t rights;
181 #endif
182
183 if (cb == NULL) {
184 WPRINTF(("TAP backend requires non-NULL callback\n"));
185 return (-1);
186 }
187
188 strcpy(tbuf, "/dev/");
189 strlcat(tbuf, devname, sizeof(tbuf));
190
191 be->fd = open(tbuf, O_RDWR);
192 if (be->fd == -1) {
193 WPRINTF(("open of tap device %s failed\n", tbuf));
194 goto error;
195 }
196
197 /*
198 * Set non-blocking and register for read
199 * notifications with the event loop
200 */
201 if (ioctl(be->fd, FIONBIO, &opt) < 0) {
202 WPRINTF(("tap device O_NONBLOCK failed\n"));
203 goto error;
204 }
205
206 #ifndef WITHOUT_CAPSICUM
207 cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE);
208 if (caph_rights_limit(be->fd, &rights) == -1)
209 errx(EX_OSERR, "Unable to apply rights for sandbox");
210 #endif
211
212 priv->mevp = mevent_add(be->fd, EVF_READ, cb, param);
213 if (priv->mevp == NULL) {
214 WPRINTF(("Could not register event\n"));
215 goto error;
216 }
217
218 return (0);
219
220 error:
221 tap_cleanup(be);
222 return (-1);
223 }
224
225 /*
226 * Called to send a buffer chain out to the tap device
227 */
228 static ssize_t
tap_send(struct net_backend * be,struct iovec * iov,int iovcnt)229 tap_send(struct net_backend *be, struct iovec *iov, int iovcnt)
230 {
231 return (writev(be->fd, iov, iovcnt));
232 }
233
234 static ssize_t
tap_recv(struct net_backend * be,struct iovec * iov,int iovcnt)235 tap_recv(struct net_backend *be, struct iovec *iov, int iovcnt)
236 {
237 ssize_t ret;
238
239 /* Should never be called without a valid tap fd */
240 assert(be->fd != -1);
241
242 ret = readv(be->fd, iov, iovcnt);
243
244 if (ret < 0 && errno == EWOULDBLOCK) {
245 return (0);
246 }
247
248 return (ret);
249 }
250
251 static uint64_t
tap_get_cap(struct net_backend * be)252 tap_get_cap(struct net_backend *be)
253 {
254
255 return (0); /* no capabilities for now */
256 }
257
258 static int
tap_set_cap(struct net_backend * be,uint64_t features,unsigned vnet_hdr_len)259 tap_set_cap(struct net_backend *be, uint64_t features,
260 unsigned vnet_hdr_len)
261 {
262
263 return ((features || vnet_hdr_len) ? -1 : 0);
264 }
265
266 static struct net_backend tap_backend = {
267 .prefix = "tap",
268 .priv_size = sizeof(struct tap_priv),
269 .init = tap_init,
270 .cleanup = tap_cleanup,
271 .send = tap_send,
272 .recv = tap_recv,
273 .get_cap = tap_get_cap,
274 .set_cap = tap_set_cap,
275 };
276
277 /* A clone of the tap backend, with a different prefix. */
278 static struct net_backend vmnet_backend = {
279 .prefix = "vmnet",
280 .priv_size = sizeof(struct tap_priv),
281 .init = tap_init,
282 .cleanup = tap_cleanup,
283 .send = tap_send,
284 .recv = tap_recv,
285 .get_cap = tap_get_cap,
286 .set_cap = tap_set_cap,
287 };
288
289 DATA_SET(net_backend_set, tap_backend);
290 DATA_SET(net_backend_set, vmnet_backend);
291
292 /*
293 * The netmap backend
294 */
295
296 /* The virtio-net features supported by netmap. */
297 #define NETMAP_FEATURES (VIRTIO_NET_F_CSUM | VIRTIO_NET_F_HOST_TSO4 | \
298 VIRTIO_NET_F_HOST_TSO6 | VIRTIO_NET_F_HOST_UFO | \
299 VIRTIO_NET_F_GUEST_CSUM | VIRTIO_NET_F_GUEST_TSO4 | \
300 VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_UFO)
301
302 struct netmap_priv {
303 char ifname[IFNAMSIZ];
304 struct nm_desc *nmd;
305 uint16_t memid;
306 struct netmap_ring *rx;
307 struct netmap_ring *tx;
308 struct mevent *mevp;
309 net_be_rxeof_t cb;
310 void *cb_param;
311 };
312
313 static void
nmreq_init(struct nmreq * req,char * ifname)314 nmreq_init(struct nmreq *req, char *ifname)
315 {
316
317 memset(req, 0, sizeof(*req));
318 strlcpy(req->nr_name, ifname, sizeof(req->nr_name));
319 req->nr_version = NETMAP_API;
320 }
321
322 static int
netmap_set_vnet_hdr_len(struct net_backend * be,int vnet_hdr_len)323 netmap_set_vnet_hdr_len(struct net_backend *be, int vnet_hdr_len)
324 {
325 int err;
326 struct nmreq req;
327 struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
328
329 nmreq_init(&req, priv->ifname);
330 req.nr_cmd = NETMAP_BDG_VNET_HDR;
331 req.nr_arg1 = vnet_hdr_len;
332 err = ioctl(be->fd, NIOCREGIF, &req);
333 if (err) {
334 WPRINTF(("Unable to set vnet header length %d\n",
335 vnet_hdr_len));
336 return (err);
337 }
338
339 be->be_vnet_hdr_len = vnet_hdr_len;
340
341 return (0);
342 }
343
344 static int
netmap_has_vnet_hdr_len(struct net_backend * be,unsigned vnet_hdr_len)345 netmap_has_vnet_hdr_len(struct net_backend *be, unsigned vnet_hdr_len)
346 {
347 int prev_hdr_len = be->be_vnet_hdr_len;
348 int ret;
349
350 if (vnet_hdr_len == prev_hdr_len) {
351 return (1);
352 }
353
354 ret = netmap_set_vnet_hdr_len(be, vnet_hdr_len);
355 if (ret) {
356 return (0);
357 }
358
359 netmap_set_vnet_hdr_len(be, prev_hdr_len);
360
361 return (1);
362 }
363
364 static uint64_t
netmap_get_cap(struct net_backend * be)365 netmap_get_cap(struct net_backend *be)
366 {
367
368 return (netmap_has_vnet_hdr_len(be, VNET_HDR_LEN) ?
369 NETMAP_FEATURES : 0);
370 }
371
372 static int
netmap_set_cap(struct net_backend * be,uint64_t features,unsigned vnet_hdr_len)373 netmap_set_cap(struct net_backend *be, uint64_t features,
374 unsigned vnet_hdr_len)
375 {
376
377 return (netmap_set_vnet_hdr_len(be, vnet_hdr_len));
378 }
379
380 static int
netmap_init(struct net_backend * be,const char * devname,net_be_rxeof_t cb,void * param)381 netmap_init(struct net_backend *be, const char *devname,
382 net_be_rxeof_t cb, void *param)
383 {
384 struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
385
386 strlcpy(priv->ifname, devname, sizeof(priv->ifname));
387 priv->ifname[sizeof(priv->ifname) - 1] = '\0';
388
389 priv->nmd = nm_open(priv->ifname, NULL, NETMAP_NO_TX_POLL, NULL);
390 if (priv->nmd == NULL) {
391 WPRINTF(("Unable to nm_open(): interface '%s', errno (%s)\n",
392 devname, strerror(errno)));
393 free(priv);
394 return (-1);
395 }
396
397 priv->memid = priv->nmd->req.nr_arg2;
398 priv->tx = NETMAP_TXRING(priv->nmd->nifp, 0);
399 priv->rx = NETMAP_RXRING(priv->nmd->nifp, 0);
400 priv->cb = cb;
401 priv->cb_param = param;
402 be->fd = priv->nmd->fd;
403
404 priv->mevp = mevent_add(be->fd, EVF_READ, cb, param);
405 if (priv->mevp == NULL) {
406 WPRINTF(("Could not register event\n"));
407 return (-1);
408 }
409
410 return (0);
411 }
412
413 static void
netmap_cleanup(struct net_backend * be)414 netmap_cleanup(struct net_backend *be)
415 {
416 struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
417
418 if (priv->mevp) {
419 mevent_delete(priv->mevp);
420 }
421 if (priv->nmd) {
422 nm_close(priv->nmd);
423 }
424 be->fd = -1;
425 }
426
427 static ssize_t
netmap_send(struct net_backend * be,struct iovec * iov,int iovcnt)428 netmap_send(struct net_backend *be, struct iovec *iov,
429 int iovcnt)
430 {
431 struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
432 struct netmap_ring *ring;
433 ssize_t totlen = 0;
434 int nm_buf_size;
435 int nm_buf_len;
436 uint32_t head;
437 void *nm_buf;
438 int j;
439
440 ring = priv->tx;
441 head = ring->head;
442 if (head == ring->tail) {
443 WPRINTF(("No space, drop %zu bytes\n", count_iov(iov, iovcnt)));
444 goto txsync;
445 }
446 nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx);
447 nm_buf_size = ring->nr_buf_size;
448 nm_buf_len = 0;
449
450 for (j = 0; j < iovcnt; j++) {
451 int iov_frag_size = iov[j].iov_len;
452 void *iov_frag_buf = iov[j].iov_base;
453
454 totlen += iov_frag_size;
455
456 /*
457 * Split each iovec fragment over more netmap slots, if
458 * necessary.
459 */
460 for (;;) {
461 int copylen;
462
463 copylen = iov_frag_size < nm_buf_size ? iov_frag_size : nm_buf_size;
464 memcpy(nm_buf, iov_frag_buf, copylen);
465
466 iov_frag_buf += copylen;
467 iov_frag_size -= copylen;
468 nm_buf += copylen;
469 nm_buf_size -= copylen;
470 nm_buf_len += copylen;
471
472 if (iov_frag_size == 0) {
473 break;
474 }
475
476 ring->slot[head].len = nm_buf_len;
477 ring->slot[head].flags = NS_MOREFRAG;
478 head = nm_ring_next(ring, head);
479 if (head == ring->tail) {
480 /*
481 * We ran out of netmap slots while
482 * splitting the iovec fragments.
483 */
484 WPRINTF(("No space, drop %zu bytes\n",
485 count_iov(iov, iovcnt)));
486 goto txsync;
487 }
488 nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx);
489 nm_buf_size = ring->nr_buf_size;
490 nm_buf_len = 0;
491 }
492 }
493
494 /* Complete the last slot, which must not have NS_MOREFRAG set. */
495 ring->slot[head].len = nm_buf_len;
496 ring->slot[head].flags = 0;
497 head = nm_ring_next(ring, head);
498
499 /* Now update ring->head and ring->cur. */
500 ring->head = ring->cur = head;
501 txsync:
502 ioctl(be->fd, NIOCTXSYNC, NULL);
503
504 return (totlen);
505 }
506
507 static ssize_t
netmap_recv(struct net_backend * be,struct iovec * iov,int iovcnt)508 netmap_recv(struct net_backend *be, struct iovec *iov, int iovcnt)
509 {
510 struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
511 struct netmap_slot *slot = NULL;
512 struct netmap_ring *ring;
513 void *iov_frag_buf;
514 int iov_frag_size;
515 ssize_t totlen = 0;
516 uint32_t head;
517
518 assert(iovcnt);
519
520 ring = priv->rx;
521 head = ring->head;
522 iov_frag_buf = iov->iov_base;
523 iov_frag_size = iov->iov_len;
524
525 do {
526 int nm_buf_len;
527 void *nm_buf;
528
529 if (head == ring->tail) {
530 return (0);
531 }
532
533 slot = ring->slot + head;
534 nm_buf = NETMAP_BUF(ring, slot->buf_idx);
535 nm_buf_len = slot->len;
536
537 for (;;) {
538 int copylen = nm_buf_len < iov_frag_size ?
539 nm_buf_len : iov_frag_size;
540
541 memcpy(iov_frag_buf, nm_buf, copylen);
542 nm_buf += copylen;
543 nm_buf_len -= copylen;
544 iov_frag_buf += copylen;
545 iov_frag_size -= copylen;
546 totlen += copylen;
547
548 if (nm_buf_len == 0) {
549 break;
550 }
551
552 iov++;
553 iovcnt--;
554 if (iovcnt == 0) {
555 /* No space to receive. */
556 WPRINTF(("Short iov, drop %zd bytes\n",
557 totlen));
558 return (-ENOSPC);
559 }
560 iov_frag_buf = iov->iov_base;
561 iov_frag_size = iov->iov_len;
562 }
563
564 head = nm_ring_next(ring, head);
565
566 } while (slot->flags & NS_MOREFRAG);
567
568 /* Release slots to netmap. */
569 ring->head = ring->cur = head;
570
571 return (totlen);
572 }
573
574 static struct net_backend netmap_backend = {
575 .prefix = "netmap",
576 .priv_size = sizeof(struct netmap_priv),
577 .init = netmap_init,
578 .cleanup = netmap_cleanup,
579 .send = netmap_send,
580 .recv = netmap_recv,
581 .get_cap = netmap_get_cap,
582 .set_cap = netmap_set_cap,
583 };
584
585 /* A clone of the netmap backend, with a different prefix. */
586 static struct net_backend vale_backend = {
587 .prefix = "vale",
588 .priv_size = sizeof(struct netmap_priv),
589 .init = netmap_init,
590 .cleanup = netmap_cleanup,
591 .send = netmap_send,
592 .recv = netmap_recv,
593 .get_cap = netmap_get_cap,
594 .set_cap = netmap_set_cap,
595 };
596
597 DATA_SET(net_backend_set, netmap_backend);
598 DATA_SET(net_backend_set, vale_backend);
599
600 /*
601 * Initialize a backend and attach to the frontend.
602 * This is called during frontend initialization.
603 * @pbe is a pointer to the backend to be initialized
604 * @devname is the backend-name as supplied on the command line,
605 * e.g. -s 2:0,frontend-name,backend-name[,other-args]
606 * @cb is the receive callback supplied by the frontend,
607 * and it is invoked in the event loop when a receive
608 * event is generated in the hypervisor,
609 * @param is a pointer to the frontend, and normally used as
610 * the argument for the callback.
611 */
612 int
netbe_init(struct net_backend ** ret,const char * devname,net_be_rxeof_t cb,void * param)613 netbe_init(struct net_backend **ret, const char *devname, net_be_rxeof_t cb,
614 void *param)
615 {
616 struct net_backend **pbe, *nbe, *tbe = NULL;
617 int err;
618
619 /*
620 * Find the network backend that matches the user-provided
621 * device name. net_backend_set is built using a linker set.
622 */
623 SET_FOREACH(pbe, net_backend_set) {
624 if (strncmp(devname, (*pbe)->prefix,
625 strlen((*pbe)->prefix)) == 0) {
626 tbe = *pbe;
627 assert(tbe->init != NULL);
628 assert(tbe->cleanup != NULL);
629 assert(tbe->send != NULL);
630 assert(tbe->recv != NULL);
631 assert(tbe->get_cap != NULL);
632 assert(tbe->set_cap != NULL);
633 break;
634 }
635 }
636
637 *ret = NULL;
638 if (tbe == NULL)
639 return (EINVAL);
640 nbe = calloc(1, sizeof(*nbe) + tbe->priv_size);
641 *nbe = *tbe; /* copy the template */
642 nbe->fd = -1;
643 nbe->sc = param;
644 nbe->be_vnet_hdr_len = 0;
645 nbe->fe_vnet_hdr_len = 0;
646
647 /* Initialize the backend. */
648 err = nbe->init(nbe, devname, cb, param);
649 if (err) {
650 free(nbe);
651 return (err);
652 }
653
654 *ret = nbe;
655
656 return (0);
657 }
658
659 void
netbe_cleanup(struct net_backend * be)660 netbe_cleanup(struct net_backend *be)
661 {
662
663 if (be != NULL) {
664 be->cleanup(be);
665 free(be);
666 }
667 }
668
669 uint64_t
netbe_get_cap(struct net_backend * be)670 netbe_get_cap(struct net_backend *be)
671 {
672
673 assert(be != NULL);
674 return (be->get_cap(be));
675 }
676
677 int
netbe_set_cap(struct net_backend * be,uint64_t features,unsigned vnet_hdr_len)678 netbe_set_cap(struct net_backend *be, uint64_t features,
679 unsigned vnet_hdr_len)
680 {
681 int ret;
682
683 assert(be != NULL);
684
685 /* There are only three valid lengths, i.e., 0, 10 and 12. */
686 if (vnet_hdr_len && vnet_hdr_len != VNET_HDR_LEN
687 && vnet_hdr_len != (VNET_HDR_LEN - sizeof(uint16_t)))
688 return (-1);
689
690 be->fe_vnet_hdr_len = vnet_hdr_len;
691
692 ret = be->set_cap(be, features, vnet_hdr_len);
693 assert(be->be_vnet_hdr_len == 0 ||
694 be->be_vnet_hdr_len == be->fe_vnet_hdr_len);
695
696 return (ret);
697 }
698
699 static __inline struct iovec *
iov_trim(struct iovec * iov,int * iovcnt,unsigned int tlen)700 iov_trim(struct iovec *iov, int *iovcnt, unsigned int tlen)
701 {
702 struct iovec *riov;
703
704 /* XXX short-cut: assume first segment is >= tlen */
705 assert(iov[0].iov_len >= tlen);
706
707 iov[0].iov_len -= tlen;
708 if (iov[0].iov_len == 0) {
709 assert(*iovcnt > 1);
710 *iovcnt -= 1;
711 riov = &iov[1];
712 } else {
713 iov[0].iov_base = (void *)((uintptr_t)iov[0].iov_base + tlen);
714 riov = &iov[0];
715 }
716
717 return (riov);
718 }
719
720 ssize_t
netbe_send(struct net_backend * be,struct iovec * iov,int iovcnt)721 netbe_send(struct net_backend *be, struct iovec *iov, int iovcnt)
722 {
723
724 assert(be != NULL);
725 if (be->be_vnet_hdr_len != be->fe_vnet_hdr_len) {
726 /*
727 * The frontend uses a virtio-net header, but the backend
728 * does not. We ignore it (as it must be all zeroes) and
729 * strip it.
730 */
731 assert(be->be_vnet_hdr_len == 0);
732 iov = iov_trim(iov, &iovcnt, be->fe_vnet_hdr_len);
733 }
734
735 return (be->send(be, iov, iovcnt));
736 }
737
738 /*
739 * Try to read a packet from the backend, without blocking.
740 * If no packets are available, return 0. In case of success, return
741 * the length of the packet just read. Return -1 in case of errors.
742 */
743 ssize_t
netbe_recv(struct net_backend * be,struct iovec * iov,int iovcnt)744 netbe_recv(struct net_backend *be, struct iovec *iov, int iovcnt)
745 {
746 /* Length of prepended virtio-net header. */
747 unsigned int hlen = be->fe_vnet_hdr_len;
748 int ret;
749
750 assert(be != NULL);
751
752 if (hlen && hlen != be->be_vnet_hdr_len) {
753 /*
754 * The frontend uses a virtio-net header, but the backend
755 * does not. We need to prepend a zeroed header.
756 */
757 struct virtio_net_rxhdr *vh;
758
759 assert(be->be_vnet_hdr_len == 0);
760
761 /*
762 * Get a pointer to the rx header, and use the
763 * data immediately following it for the packet buffer.
764 */
765 vh = iov[0].iov_base;
766 iov = iov_trim(iov, &iovcnt, hlen);
767
768 /*
769 * The only valid field in the rx packet header is the
770 * number of buffers if merged rx bufs were negotiated.
771 */
772 memset(vh, 0, hlen);
773 if (hlen == VNET_HDR_LEN) {
774 vh->vrh_bufs = 1;
775 }
776 }
777
778 ret = be->recv(be, iov, iovcnt);
779 if (ret > 0) {
780 ret += hlen;
781 }
782
783 return (ret);
784 }
785
786 /*
787 * Read a packet from the backend and discard it.
788 * Returns the size of the discarded packet or zero if no packet was available.
789 * A negative error code is returned in case of read error.
790 */
791 ssize_t
netbe_rx_discard(struct net_backend * be)792 netbe_rx_discard(struct net_backend *be)
793 {
794 /*
795 * MP note: the dummybuf is only used to discard frames,
796 * so there is no need for it to be per-vtnet or locked.
797 * We only make it large enough for TSO-sized segment.
798 */
799 static uint8_t dummybuf[65536 + 64];
800 struct iovec iov;
801
802 iov.iov_base = dummybuf;
803 iov.iov_len = sizeof(dummybuf);
804
805 return netbe_recv(be, &iov, 1);
806 }
807
808