1718cf2ccSPedro F. Giffuni /*-
2718cf2ccSPedro F. Giffuni  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3718cf2ccSPedro F. Giffuni  *
437e3a6d3SLuigi Rizzo  * Copyright (C) 2014-2015 Vincenzo Maffione
537e3a6d3SLuigi Rizzo  * All rights reserved.
6f0ea3689SLuigi Rizzo  *
7f0ea3689SLuigi Rizzo  * Redistribution and use in source and binary forms, with or without
8f0ea3689SLuigi Rizzo  * modification, are permitted provided that the following conditions
9f0ea3689SLuigi Rizzo  * are met:
10f0ea3689SLuigi Rizzo  *   1. Redistributions of source code must retain the above copyright
11f0ea3689SLuigi Rizzo  *      notice, this list of conditions and the following disclaimer.
12f0ea3689SLuigi Rizzo  *   2. Redistributions in binary form must reproduce the above copyright
13f0ea3689SLuigi Rizzo  *      notice, this list of conditions and the following disclaimer in the
14f0ea3689SLuigi Rizzo  *      documentation and/or other materials provided with the distribution.
15f0ea3689SLuigi Rizzo  *
16f0ea3689SLuigi Rizzo  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17f0ea3689SLuigi Rizzo  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18f0ea3689SLuigi Rizzo  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19f0ea3689SLuigi Rizzo  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20f0ea3689SLuigi Rizzo  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21f0ea3689SLuigi Rizzo  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22f0ea3689SLuigi Rizzo  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23f0ea3689SLuigi Rizzo  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24f0ea3689SLuigi Rizzo  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25f0ea3689SLuigi Rizzo  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26f0ea3689SLuigi Rizzo  * SUCH DAMAGE.
27f0ea3689SLuigi Rizzo  */
28f0ea3689SLuigi Rizzo 
29f0ea3689SLuigi Rizzo /* $FreeBSD$ */
30f0ea3689SLuigi Rizzo 
31f0ea3689SLuigi Rizzo #if defined(__FreeBSD__)
32f0ea3689SLuigi Rizzo #include <sys/cdefs.h> /* prerequisite */
33f0ea3689SLuigi Rizzo 
34f0ea3689SLuigi Rizzo #include <sys/types.h>
35f0ea3689SLuigi Rizzo #include <sys/errno.h>
36f0ea3689SLuigi Rizzo #include <sys/param.h>	/* defines used in kernel.h */
37f0ea3689SLuigi Rizzo #include <sys/kernel.h>	/* types used in module initialization */
38f0ea3689SLuigi Rizzo #include <sys/sockio.h>
3937e3a6d3SLuigi Rizzo #include <sys/malloc.h>
40f0ea3689SLuigi Rizzo #include <sys/socketvar.h>	/* struct socket */
41f0ea3689SLuigi Rizzo #include <sys/socket.h> /* sockaddrs */
42f0ea3689SLuigi Rizzo #include <net/if.h>
43f0ea3689SLuigi Rizzo #include <net/if_var.h>
44f0ea3689SLuigi Rizzo #include <machine/bus.h>	/* bus_dmamap_* */
45f0ea3689SLuigi Rizzo #include <sys/endian.h>
46f0ea3689SLuigi Rizzo 
47f0ea3689SLuigi Rizzo #elif defined(linux)
48f0ea3689SLuigi Rizzo 
49f0ea3689SLuigi Rizzo #include "bsd_glue.h"
50f0ea3689SLuigi Rizzo 
51f0ea3689SLuigi Rizzo #elif defined(__APPLE__)
52f0ea3689SLuigi Rizzo 
53f0ea3689SLuigi Rizzo #warning OSX support is only partial
54f0ea3689SLuigi Rizzo #include "osx_glue.h"
55f0ea3689SLuigi Rizzo 
56f0ea3689SLuigi Rizzo #else
57f0ea3689SLuigi Rizzo 
58f0ea3689SLuigi Rizzo #error	Unsupported platform
59f0ea3689SLuigi Rizzo 
60f0ea3689SLuigi Rizzo #endif /* unsupported */
61f0ea3689SLuigi Rizzo 
62f0ea3689SLuigi Rizzo #include <net/netmap.h>
63f0ea3689SLuigi Rizzo #include <dev/netmap/netmap_kern.h>
64f0ea3689SLuigi Rizzo 
65f0ea3689SLuigi Rizzo 
66f0ea3689SLuigi Rizzo 
67f0ea3689SLuigi Rizzo /* This routine is called by bdg_mismatch_datapath() when it finishes
68f0ea3689SLuigi Rizzo  * accumulating bytes for a segment, in order to fix some fields in the
69f0ea3689SLuigi Rizzo  * segment headers (which still contain the same content as the header
7037e3a6d3SLuigi Rizzo  * of the original GSO packet). 'pkt' points to the beginning of the IP
7137e3a6d3SLuigi Rizzo  * header of the segment, while 'len' is the length of the IP packet.
72f0ea3689SLuigi Rizzo  */
7337e3a6d3SLuigi Rizzo static void
gso_fix_segment(uint8_t * pkt,size_t len,u_int ipv4,u_int iphlen,u_int tcp,u_int idx,u_int segmented_bytes,u_int last_segment)7437e3a6d3SLuigi Rizzo gso_fix_segment(uint8_t *pkt, size_t len, u_int ipv4, u_int iphlen, u_int tcp,
7537e3a6d3SLuigi Rizzo 		u_int idx, u_int segmented_bytes, u_int last_segment)
76f0ea3689SLuigi Rizzo {
7737e3a6d3SLuigi Rizzo 	struct nm_iphdr *iph = (struct nm_iphdr *)(pkt);
7837e3a6d3SLuigi Rizzo 	struct nm_ipv6hdr *ip6h = (struct nm_ipv6hdr *)(pkt);
79f0ea3689SLuigi Rizzo 	uint16_t *check = NULL;
80f0ea3689SLuigi Rizzo 	uint8_t *check_data = NULL;
81f0ea3689SLuigi Rizzo 
8237e3a6d3SLuigi Rizzo 	if (ipv4) {
83f0ea3689SLuigi Rizzo 		/* Set the IPv4 "Total Length" field. */
8437e3a6d3SLuigi Rizzo 		iph->tot_len = htobe16(len);
85*75f4f3edSVincenzo Maffione 		nm_prdis("ip total length %u", be16toh(ip->tot_len));
86f0ea3689SLuigi Rizzo 
87f0ea3689SLuigi Rizzo 		/* Set the IPv4 "Identification" field. */
88f0ea3689SLuigi Rizzo 		iph->id = htobe16(be16toh(iph->id) + idx);
89*75f4f3edSVincenzo Maffione 		nm_prdis("ip identification %u", be16toh(iph->id));
90f0ea3689SLuigi Rizzo 
91f0ea3689SLuigi Rizzo 		/* Compute and insert the IPv4 header checksum. */
92f0ea3689SLuigi Rizzo 		iph->check = 0;
9337e3a6d3SLuigi Rizzo 		iph->check = nm_os_csum_ipv4(iph);
94*75f4f3edSVincenzo Maffione 		nm_prdis("IP csum %x", be16toh(iph->check));
9537e3a6d3SLuigi Rizzo 	} else {
96f0ea3689SLuigi Rizzo 		/* Set the IPv6 "Payload Len" field. */
9737e3a6d3SLuigi Rizzo 		ip6h->payload_len = htobe16(len-iphlen);
98f0ea3689SLuigi Rizzo 	}
99f0ea3689SLuigi Rizzo 
100f0ea3689SLuigi Rizzo 	if (tcp) {
10137e3a6d3SLuigi Rizzo 		struct nm_tcphdr *tcph = (struct nm_tcphdr *)(pkt + iphlen);
102f0ea3689SLuigi Rizzo 
103f0ea3689SLuigi Rizzo 		/* Set the TCP sequence number. */
104f0ea3689SLuigi Rizzo 		tcph->seq = htobe32(be32toh(tcph->seq) + segmented_bytes);
105*75f4f3edSVincenzo Maffione 		nm_prdis("tcp seq %u", be32toh(tcph->seq));
106f0ea3689SLuigi Rizzo 
107f0ea3689SLuigi Rizzo 		/* Zero the PSH and FIN TCP flags if this is not the last
108f0ea3689SLuigi Rizzo 		   segment. */
109f0ea3689SLuigi Rizzo 		if (!last_segment)
110f0ea3689SLuigi Rizzo 			tcph->flags &= ~(0x8 | 0x1);
111*75f4f3edSVincenzo Maffione 		nm_prdis("last_segment %u", last_segment);
112f0ea3689SLuigi Rizzo 
113f0ea3689SLuigi Rizzo 		check = &tcph->check;
114f0ea3689SLuigi Rizzo 		check_data = (uint8_t *)tcph;
115f0ea3689SLuigi Rizzo 	} else { /* UDP */
11637e3a6d3SLuigi Rizzo 		struct nm_udphdr *udph = (struct nm_udphdr *)(pkt + iphlen);
117f0ea3689SLuigi Rizzo 
118f0ea3689SLuigi Rizzo 		/* Set the UDP 'Length' field. */
11937e3a6d3SLuigi Rizzo 		udph->len = htobe16(len-iphlen);
120f0ea3689SLuigi Rizzo 
121f0ea3689SLuigi Rizzo 		check = &udph->check;
122f0ea3689SLuigi Rizzo 		check_data = (uint8_t *)udph;
123f0ea3689SLuigi Rizzo 	}
124f0ea3689SLuigi Rizzo 
125f0ea3689SLuigi Rizzo 	/* Compute and insert TCP/UDP checksum. */
126f0ea3689SLuigi Rizzo 	*check = 0;
12737e3a6d3SLuigi Rizzo 	if (ipv4)
12837e3a6d3SLuigi Rizzo 		nm_os_csum_tcpudp_ipv4(iph, check_data, len-iphlen, check);
129f0ea3689SLuigi Rizzo 	else
13037e3a6d3SLuigi Rizzo 		nm_os_csum_tcpudp_ipv6(ip6h, check_data, len-iphlen, check);
131f0ea3689SLuigi Rizzo 
132*75f4f3edSVincenzo Maffione 	nm_prdis("TCP/UDP csum %x", be16toh(*check));
133f0ea3689SLuigi Rizzo }
134f0ea3689SLuigi Rizzo 
1354f80b14cSVincenzo Maffione static inline int
vnet_hdr_is_bad(struct nm_vnet_hdr * vh)13637e3a6d3SLuigi Rizzo vnet_hdr_is_bad(struct nm_vnet_hdr *vh)
13737e3a6d3SLuigi Rizzo {
13837e3a6d3SLuigi Rizzo 	uint8_t gso_type = vh->gso_type & ~VIRTIO_NET_HDR_GSO_ECN;
13937e3a6d3SLuigi Rizzo 
14037e3a6d3SLuigi Rizzo 	return (
14137e3a6d3SLuigi Rizzo 		(gso_type != VIRTIO_NET_HDR_GSO_NONE &&
14237e3a6d3SLuigi Rizzo 		 gso_type != VIRTIO_NET_HDR_GSO_TCPV4 &&
14337e3a6d3SLuigi Rizzo 		 gso_type != VIRTIO_NET_HDR_GSO_UDP &&
14437e3a6d3SLuigi Rizzo 		 gso_type != VIRTIO_NET_HDR_GSO_TCPV6)
14537e3a6d3SLuigi Rizzo 		||
14637e3a6d3SLuigi Rizzo 		 (vh->flags & ~(VIRTIO_NET_HDR_F_NEEDS_CSUM
14737e3a6d3SLuigi Rizzo 			       | VIRTIO_NET_HDR_F_DATA_VALID))
14837e3a6d3SLuigi Rizzo 	       );
14937e3a6d3SLuigi Rizzo }
150f0ea3689SLuigi Rizzo 
151f0ea3689SLuigi Rizzo /* The VALE mismatch datapath implementation. */
15237e3a6d3SLuigi Rizzo void
bdg_mismatch_datapath(struct netmap_vp_adapter * na,struct netmap_vp_adapter * dst_na,const struct nm_bdg_fwd * ft_p,struct netmap_ring * dst_ring,u_int * j,u_int lim,u_int * howmany)15337e3a6d3SLuigi Rizzo bdg_mismatch_datapath(struct netmap_vp_adapter *na,
154f0ea3689SLuigi Rizzo 		      struct netmap_vp_adapter *dst_na,
15537e3a6d3SLuigi Rizzo 		      const struct nm_bdg_fwd *ft_p,
15637e3a6d3SLuigi Rizzo 		      struct netmap_ring *dst_ring,
157f0ea3689SLuigi Rizzo 		      u_int *j, u_int lim, u_int *howmany)
158f0ea3689SLuigi Rizzo {
15937e3a6d3SLuigi Rizzo 	struct netmap_slot *dst_slot = NULL;
160f0ea3689SLuigi Rizzo 	struct nm_vnet_hdr *vh = NULL;
16137e3a6d3SLuigi Rizzo 	const struct nm_bdg_fwd *ft_end = ft_p + ft_p->ft_frags;
162f0ea3689SLuigi Rizzo 
163f0ea3689SLuigi Rizzo 	/* Source and destination pointers. */
164f0ea3689SLuigi Rizzo 	uint8_t *dst, *src;
165f0ea3689SLuigi Rizzo 	size_t src_len, dst_len;
166f0ea3689SLuigi Rizzo 
16737e3a6d3SLuigi Rizzo 	/* Indices and counters for the destination ring. */
168f0ea3689SLuigi Rizzo 	u_int j_start = *j;
16937e3a6d3SLuigi Rizzo 	u_int j_cur = j_start;
170f0ea3689SLuigi Rizzo 	u_int dst_slots = 0;
171f0ea3689SLuigi Rizzo 
17237e3a6d3SLuigi Rizzo 	if (unlikely(ft_p == ft_end)) {
173*75f4f3edSVincenzo Maffione 		nm_prlim(1, "No source slots to process");
17437e3a6d3SLuigi Rizzo 		return;
175f0ea3689SLuigi Rizzo 	}
176f0ea3689SLuigi Rizzo 
177f0ea3689SLuigi Rizzo 	/* Init source and dest pointers. */
178f0ea3689SLuigi Rizzo 	src = ft_p->ft_buf;
179f0ea3689SLuigi Rizzo 	src_len = ft_p->ft_len;
18037e3a6d3SLuigi Rizzo 	dst_slot = &dst_ring->slot[j_cur];
18137e3a6d3SLuigi Rizzo 	dst = NMB(&dst_na->up, dst_slot);
182f0ea3689SLuigi Rizzo 	dst_len = src_len;
183f0ea3689SLuigi Rizzo 
18437e3a6d3SLuigi Rizzo 	/* If the source port uses the offloadings, while destination doesn't,
18537e3a6d3SLuigi Rizzo 	 * we grab the source virtio-net header and do the offloadings here.
18637e3a6d3SLuigi Rizzo 	 */
18737e3a6d3SLuigi Rizzo 	if (na->up.virt_hdr_len && !dst_na->up.virt_hdr_len) {
18837e3a6d3SLuigi Rizzo 		vh = (struct nm_vnet_hdr *)src;
18937e3a6d3SLuigi Rizzo 		/* Initial sanity check on the source virtio-net header. If
19037e3a6d3SLuigi Rizzo 		 * something seems wrong, just drop the packet. */
19137e3a6d3SLuigi Rizzo 		if (src_len < na->up.virt_hdr_len) {
192*75f4f3edSVincenzo Maffione 			nm_prlim(1, "Short src vnet header, dropping");
19337e3a6d3SLuigi Rizzo 			return;
19437e3a6d3SLuigi Rizzo 		}
1954f80b14cSVincenzo Maffione 		if (unlikely(vnet_hdr_is_bad(vh))) {
196*75f4f3edSVincenzo Maffione 			nm_prlim(1, "Bad src vnet header, dropping");
19737e3a6d3SLuigi Rizzo 			return;
19837e3a6d3SLuigi Rizzo 		}
19937e3a6d3SLuigi Rizzo 	}
20037e3a6d3SLuigi Rizzo 
201f0ea3689SLuigi Rizzo 	/* We are processing the first input slot and there is a mismatch
202f0ea3689SLuigi Rizzo 	 * between source and destination virt_hdr_len (SHL and DHL).
203f0ea3689SLuigi Rizzo 	 * When the a client is using virtio-net headers, the header length
204f0ea3689SLuigi Rizzo 	 * can be:
205f0ea3689SLuigi Rizzo 	 *    - 10: the header corresponds to the struct nm_vnet_hdr
206f0ea3689SLuigi Rizzo 	 *    - 12: the first 10 bytes correspond to the struct
207f0ea3689SLuigi Rizzo 	 *          virtio_net_hdr, and the last 2 bytes store the
208f0ea3689SLuigi Rizzo 	 *          "mergeable buffers" info, which is an optional
209453130d9SPedro F. Giffuni 	 *	    hint that can be zeroed for compatibility
210f0ea3689SLuigi Rizzo 	 *
211f0ea3689SLuigi Rizzo 	 * The destination header is therefore built according to the
212f0ea3689SLuigi Rizzo 	 * following table:
213f0ea3689SLuigi Rizzo 	 *
214f0ea3689SLuigi Rizzo 	 * SHL | DHL | destination header
215f0ea3689SLuigi Rizzo 	 * -----------------------------
216f0ea3689SLuigi Rizzo 	 *   0 |  10 | zero
217f0ea3689SLuigi Rizzo 	 *   0 |  12 | zero
218f0ea3689SLuigi Rizzo 	 *  10 |   0 | doesn't exist
219f0ea3689SLuigi Rizzo 	 *  10 |  12 | first 10 bytes are copied from source header, last 2 are zero
220f0ea3689SLuigi Rizzo 	 *  12 |   0 | doesn't exist
221f0ea3689SLuigi Rizzo 	 *  12 |  10 | copied from the first 10 bytes of source header
222f0ea3689SLuigi Rizzo 	 */
22337e3a6d3SLuigi Rizzo 	bzero(dst, dst_na->up.virt_hdr_len);
22437e3a6d3SLuigi Rizzo 	if (na->up.virt_hdr_len && dst_na->up.virt_hdr_len)
225f0ea3689SLuigi Rizzo 		memcpy(dst, src, sizeof(struct nm_vnet_hdr));
226f0ea3689SLuigi Rizzo 	/* Skip the virtio-net headers. */
22737e3a6d3SLuigi Rizzo 	src += na->up.virt_hdr_len;
22837e3a6d3SLuigi Rizzo 	src_len -= na->up.virt_hdr_len;
22937e3a6d3SLuigi Rizzo 	dst += dst_na->up.virt_hdr_len;
23037e3a6d3SLuigi Rizzo 	dst_len = dst_na->up.virt_hdr_len + src_len;
231f0ea3689SLuigi Rizzo 
232f0ea3689SLuigi Rizzo 	/* Here it could be dst_len == 0 (which implies src_len == 0),
233f0ea3689SLuigi Rizzo 	 * so we avoid passing a zero length fragment.
234f0ea3689SLuigi Rizzo 	 */
235f0ea3689SLuigi Rizzo 	if (dst_len == 0) {
236f0ea3689SLuigi Rizzo 		ft_p++;
237f0ea3689SLuigi Rizzo 		src = ft_p->ft_buf;
238f0ea3689SLuigi Rizzo 		src_len = ft_p->ft_len;
239f0ea3689SLuigi Rizzo 		dst_len = src_len;
240f0ea3689SLuigi Rizzo 	}
241f0ea3689SLuigi Rizzo 
242f0ea3689SLuigi Rizzo 	if (vh && vh->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
243f0ea3689SLuigi Rizzo 		u_int gso_bytes = 0;
244f0ea3689SLuigi Rizzo 		/* Length of the GSO packet header. */
245f0ea3689SLuigi Rizzo 		u_int gso_hdr_len = 0;
246f0ea3689SLuigi Rizzo 		/* Pointer to the GSO packet header. Assume it is in a single fragment. */
247f0ea3689SLuigi Rizzo 		uint8_t *gso_hdr = NULL;
248f0ea3689SLuigi Rizzo 		/* Index of the current segment. */
249f0ea3689SLuigi Rizzo 		u_int gso_idx = 0;
250f0ea3689SLuigi Rizzo 		/* Payload data bytes segmented so far (e.g. TCP data bytes). */
251f0ea3689SLuigi Rizzo 		u_int segmented_bytes = 0;
25237e3a6d3SLuigi Rizzo 		/* Is this an IPv4 or IPv6 GSO packet? */
25337e3a6d3SLuigi Rizzo 		u_int ipv4 = 0;
254f0ea3689SLuigi Rizzo 		/* Length of the IP header (20 if IPv4, 40 if IPv6). */
255f0ea3689SLuigi Rizzo 		u_int iphlen = 0;
25637e3a6d3SLuigi Rizzo 		/* Length of the Ethernet header (18 if 802.1q, otherwise 14). */
25737e3a6d3SLuigi Rizzo 		u_int ethhlen = 14;
258f0ea3689SLuigi Rizzo 		/* Is this a TCP or an UDP GSO packet? */
259f0ea3689SLuigi Rizzo 		u_int tcp = ((vh->gso_type & ~VIRTIO_NET_HDR_GSO_ECN)
260f0ea3689SLuigi Rizzo 				== VIRTIO_NET_HDR_GSO_UDP) ? 0 : 1;
261f0ea3689SLuigi Rizzo 
262f0ea3689SLuigi Rizzo 		/* Segment the GSO packet contained into the input slots (frags). */
26337e3a6d3SLuigi Rizzo 		for (;;) {
264f0ea3689SLuigi Rizzo 			size_t copy;
265f0ea3689SLuigi Rizzo 
26637e3a6d3SLuigi Rizzo 			if (dst_slots >= *howmany) {
26737e3a6d3SLuigi Rizzo 				/* We still have work to do, but we've run out of
26837e3a6d3SLuigi Rizzo 				 * dst slots, so we have to drop the packet. */
269*75f4f3edSVincenzo Maffione 				nm_prdis(1, "Not enough slots, dropping GSO packet");
27037e3a6d3SLuigi Rizzo 				return;
27137e3a6d3SLuigi Rizzo 			}
27237e3a6d3SLuigi Rizzo 
273f0ea3689SLuigi Rizzo 			/* Grab the GSO header if we don't have it. */
274f0ea3689SLuigi Rizzo 			if (!gso_hdr) {
275f0ea3689SLuigi Rizzo 				uint16_t ethertype;
276f0ea3689SLuigi Rizzo 
277f0ea3689SLuigi Rizzo 				gso_hdr = src;
278f0ea3689SLuigi Rizzo 
279f0ea3689SLuigi Rizzo 				/* Look at the 'Ethertype' field to see if this packet
28037e3a6d3SLuigi Rizzo 				 * is IPv4 or IPv6, taking into account VLAN
28137e3a6d3SLuigi Rizzo 				 * encapsulation. */
28237e3a6d3SLuigi Rizzo 				for (;;) {
28337e3a6d3SLuigi Rizzo 					if (src_len < ethhlen) {
284*75f4f3edSVincenzo Maffione 						nm_prlim(1, "Short GSO fragment [eth], dropping");
28537e3a6d3SLuigi Rizzo 						return;
28637e3a6d3SLuigi Rizzo 					}
28737e3a6d3SLuigi Rizzo 					ethertype = be16toh(*((uint16_t *)
28837e3a6d3SLuigi Rizzo 							    (gso_hdr + ethhlen - 2)));
28937e3a6d3SLuigi Rizzo 					if (ethertype != 0x8100) /* not 802.1q */
29037e3a6d3SLuigi Rizzo 						break;
29137e3a6d3SLuigi Rizzo 					ethhlen += 4;
29237e3a6d3SLuigi Rizzo 				}
29337e3a6d3SLuigi Rizzo 				switch (ethertype) {
29437e3a6d3SLuigi Rizzo 					case 0x0800:  /* IPv4 */
29537e3a6d3SLuigi Rizzo 					{
29637e3a6d3SLuigi Rizzo 						struct nm_iphdr *iph = (struct nm_iphdr *)
29737e3a6d3SLuigi Rizzo 									(gso_hdr + ethhlen);
29837e3a6d3SLuigi Rizzo 
29937e3a6d3SLuigi Rizzo 						if (src_len < ethhlen + 20) {
300*75f4f3edSVincenzo Maffione 							nm_prlim(1, "Short GSO fragment "
30137e3a6d3SLuigi Rizzo 							      "[IPv4], dropping");
30237e3a6d3SLuigi Rizzo 							return;
30337e3a6d3SLuigi Rizzo 						}
30437e3a6d3SLuigi Rizzo 						ipv4 = 1;
30537e3a6d3SLuigi Rizzo 						iphlen = 4 * (iph->version_ihl & 0x0F);
30637e3a6d3SLuigi Rizzo 						break;
30737e3a6d3SLuigi Rizzo 					}
30837e3a6d3SLuigi Rizzo 					case 0x86DD:  /* IPv6 */
30937e3a6d3SLuigi Rizzo 						ipv4 = 0;
310f0ea3689SLuigi Rizzo 						iphlen = 40;
31137e3a6d3SLuigi Rizzo 						break;
31237e3a6d3SLuigi Rizzo 					default:
313*75f4f3edSVincenzo Maffione 						nm_prlim(1, "Unsupported ethertype, "
31437e3a6d3SLuigi Rizzo 						      "dropping GSO packet");
31537e3a6d3SLuigi Rizzo 						return;
31637e3a6d3SLuigi Rizzo 				}
317*75f4f3edSVincenzo Maffione 				nm_prdis(3, "type=%04x", ethertype);
318f0ea3689SLuigi Rizzo 
31937e3a6d3SLuigi Rizzo 				if (src_len < ethhlen + iphlen) {
320*75f4f3edSVincenzo Maffione 					nm_prlim(1, "Short GSO fragment [IP], dropping");
32137e3a6d3SLuigi Rizzo 					return;
32237e3a6d3SLuigi Rizzo 				}
32337e3a6d3SLuigi Rizzo 
324f0ea3689SLuigi Rizzo 				/* Compute gso_hdr_len. For TCP we need to read the
325f0ea3689SLuigi Rizzo 				 * content of the 'Data Offset' field.
326f0ea3689SLuigi Rizzo 				 */
327f0ea3689SLuigi Rizzo 				if (tcp) {
32837e3a6d3SLuigi Rizzo 					struct nm_tcphdr *tcph = (struct nm_tcphdr *)
32937e3a6d3SLuigi Rizzo 								(gso_hdr + ethhlen + iphlen);
330f0ea3689SLuigi Rizzo 
33137e3a6d3SLuigi Rizzo 					if (src_len < ethhlen + iphlen + 20) {
332*75f4f3edSVincenzo Maffione 						nm_prlim(1, "Short GSO fragment "
33337e3a6d3SLuigi Rizzo 								"[TCP], dropping");
33437e3a6d3SLuigi Rizzo 						return;
33537e3a6d3SLuigi Rizzo 					}
33637e3a6d3SLuigi Rizzo 					gso_hdr_len = ethhlen + iphlen +
33737e3a6d3SLuigi Rizzo 						      4 * (tcph->doff >> 4);
33837e3a6d3SLuigi Rizzo 				} else {
33937e3a6d3SLuigi Rizzo 					gso_hdr_len = ethhlen + iphlen + 8; /* UDP */
34037e3a6d3SLuigi Rizzo 				}
34137e3a6d3SLuigi Rizzo 
34237e3a6d3SLuigi Rizzo 				if (src_len < gso_hdr_len) {
343*75f4f3edSVincenzo Maffione 					nm_prlim(1, "Short GSO fragment [TCP/UDP], dropping");
34437e3a6d3SLuigi Rizzo 					return;
34537e3a6d3SLuigi Rizzo 				}
346f0ea3689SLuigi Rizzo 
347*75f4f3edSVincenzo Maffione 				nm_prdis(3, "gso_hdr_len %u gso_mtu %d", gso_hdr_len,
348f0ea3689SLuigi Rizzo 								   dst_na->mfs);
349f0ea3689SLuigi Rizzo 
350f0ea3689SLuigi Rizzo 				/* Advance source pointers. */
351f0ea3689SLuigi Rizzo 				src += gso_hdr_len;
352f0ea3689SLuigi Rizzo 				src_len -= gso_hdr_len;
353f0ea3689SLuigi Rizzo 				if (src_len == 0) {
354f0ea3689SLuigi Rizzo 					ft_p++;
355f0ea3689SLuigi Rizzo 					if (ft_p == ft_end)
356f0ea3689SLuigi Rizzo 						break;
357f0ea3689SLuigi Rizzo 					src = ft_p->ft_buf;
358f0ea3689SLuigi Rizzo 					src_len = ft_p->ft_len;
359f0ea3689SLuigi Rizzo 				}
360f0ea3689SLuigi Rizzo 			}
361f0ea3689SLuigi Rizzo 
362f0ea3689SLuigi Rizzo 			/* Fill in the header of the current segment. */
363f0ea3689SLuigi Rizzo 			if (gso_bytes == 0) {
364f0ea3689SLuigi Rizzo 				memcpy(dst, gso_hdr, gso_hdr_len);
365f0ea3689SLuigi Rizzo 				gso_bytes = gso_hdr_len;
366f0ea3689SLuigi Rizzo 			}
367f0ea3689SLuigi Rizzo 
368f0ea3689SLuigi Rizzo 			/* Fill in data and update source and dest pointers. */
369f0ea3689SLuigi Rizzo 			copy = src_len;
370f0ea3689SLuigi Rizzo 			if (gso_bytes + copy > dst_na->mfs)
371f0ea3689SLuigi Rizzo 				copy = dst_na->mfs - gso_bytes;
372f0ea3689SLuigi Rizzo 			memcpy(dst + gso_bytes, src, copy);
373f0ea3689SLuigi Rizzo 			gso_bytes += copy;
374f0ea3689SLuigi Rizzo 			src += copy;
375f0ea3689SLuigi Rizzo 			src_len -= copy;
376f0ea3689SLuigi Rizzo 
377f0ea3689SLuigi Rizzo 			/* A segment is complete or we have processed all the
378f0ea3689SLuigi Rizzo 			   the GSO payload bytes. */
379f0ea3689SLuigi Rizzo 			if (gso_bytes >= dst_na->mfs ||
380f0ea3689SLuigi Rizzo 				(src_len == 0 && ft_p + 1 == ft_end)) {
381f0ea3689SLuigi Rizzo 				/* After raw segmentation, we must fix some header
382f0ea3689SLuigi Rizzo 				 * fields and compute checksums, in a protocol dependent
383f0ea3689SLuigi Rizzo 				 * way. */
38437e3a6d3SLuigi Rizzo 				gso_fix_segment(dst + ethhlen, gso_bytes - ethhlen,
38537e3a6d3SLuigi Rizzo 						ipv4, iphlen, tcp,
38637e3a6d3SLuigi Rizzo 						gso_idx, segmented_bytes,
38737e3a6d3SLuigi Rizzo 						src_len == 0 && ft_p + 1 == ft_end);
388f0ea3689SLuigi Rizzo 
389*75f4f3edSVincenzo Maffione 				nm_prdis("frame %u completed with %d bytes", gso_idx, (int)gso_bytes);
39037e3a6d3SLuigi Rizzo 				dst_slot->len = gso_bytes;
39137e3a6d3SLuigi Rizzo 				dst_slot->flags = 0;
392f0ea3689SLuigi Rizzo 				dst_slots++;
39337e3a6d3SLuigi Rizzo 				segmented_bytes += gso_bytes - gso_hdr_len;
394f0ea3689SLuigi Rizzo 
395f0ea3689SLuigi Rizzo 				gso_bytes = 0;
396f0ea3689SLuigi Rizzo 				gso_idx++;
39737e3a6d3SLuigi Rizzo 
39837e3a6d3SLuigi Rizzo 				/* Next destination slot. */
39937e3a6d3SLuigi Rizzo 				j_cur = nm_next(j_cur, lim);
40037e3a6d3SLuigi Rizzo 				dst_slot = &dst_ring->slot[j_cur];
40137e3a6d3SLuigi Rizzo 				dst = NMB(&dst_na->up, dst_slot);
402f0ea3689SLuigi Rizzo 			}
403f0ea3689SLuigi Rizzo 
404f0ea3689SLuigi Rizzo 			/* Next input slot. */
405f0ea3689SLuigi Rizzo 			if (src_len == 0) {
406f0ea3689SLuigi Rizzo 				ft_p++;
407f0ea3689SLuigi Rizzo 				if (ft_p == ft_end)
408f0ea3689SLuigi Rizzo 					break;
409f0ea3689SLuigi Rizzo 				src = ft_p->ft_buf;
410f0ea3689SLuigi Rizzo 				src_len = ft_p->ft_len;
411f0ea3689SLuigi Rizzo 			}
412f0ea3689SLuigi Rizzo 		}
413*75f4f3edSVincenzo Maffione 		nm_prdis(3, "%d bytes segmented", segmented_bytes);
414f0ea3689SLuigi Rizzo 
415f0ea3689SLuigi Rizzo 	} else {
416f0ea3689SLuigi Rizzo 		/* Address of a checksum field into a destination slot. */
417f0ea3689SLuigi Rizzo 		uint16_t *check = NULL;
418f0ea3689SLuigi Rizzo 		/* Accumulator for an unfolded checksum. */
419f0ea3689SLuigi Rizzo 		rawsum_t csum = 0;
420f0ea3689SLuigi Rizzo 
421f0ea3689SLuigi Rizzo 		/* Process a non-GSO packet. */
422f0ea3689SLuigi Rizzo 
423f0ea3689SLuigi Rizzo 		/* Init 'check' if necessary. */
424f0ea3689SLuigi Rizzo 		if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
425f0ea3689SLuigi Rizzo 			if (unlikely(vh->csum_offset + vh->csum_start > src_len))
426*75f4f3edSVincenzo Maffione 				nm_prerr("invalid checksum request");
427f0ea3689SLuigi Rizzo 			else
428f0ea3689SLuigi Rizzo 				check = (uint16_t *)(dst + vh->csum_start +
429f0ea3689SLuigi Rizzo 						vh->csum_offset);
430f0ea3689SLuigi Rizzo 		}
431f0ea3689SLuigi Rizzo 
432f0ea3689SLuigi Rizzo 		while (ft_p != ft_end) {
433f0ea3689SLuigi Rizzo 			/* Init/update the packet checksum if needed. */
434f0ea3689SLuigi Rizzo 			if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
435f0ea3689SLuigi Rizzo 				if (!dst_slots)
43637e3a6d3SLuigi Rizzo 					csum = nm_os_csum_raw(src + vh->csum_start,
437f0ea3689SLuigi Rizzo 								src_len - vh->csum_start, 0);
438f0ea3689SLuigi Rizzo 				else
43937e3a6d3SLuigi Rizzo 					csum = nm_os_csum_raw(src, src_len, csum);
440f0ea3689SLuigi Rizzo 			}
441f0ea3689SLuigi Rizzo 
442f0ea3689SLuigi Rizzo 			/* Round to a multiple of 64 */
443f0ea3689SLuigi Rizzo 			src_len = (src_len + 63) & ~63;
444f0ea3689SLuigi Rizzo 
445f0ea3689SLuigi Rizzo 			if (ft_p->ft_flags & NS_INDIRECT) {
446f0ea3689SLuigi Rizzo 				if (copyin(src, dst, src_len)) {
447f0ea3689SLuigi Rizzo 					/* Invalid user pointer, pretend len is 0. */
448f0ea3689SLuigi Rizzo 					dst_len = 0;
449f0ea3689SLuigi Rizzo 				}
450f0ea3689SLuigi Rizzo 			} else {
451f0ea3689SLuigi Rizzo 				memcpy(dst, src, (int)src_len);
452f0ea3689SLuigi Rizzo 			}
45337e3a6d3SLuigi Rizzo 			dst_slot->len = dst_len;
454f0ea3689SLuigi Rizzo 			dst_slots++;
455f0ea3689SLuigi Rizzo 
456f0ea3689SLuigi Rizzo 			/* Next destination slot. */
45737e3a6d3SLuigi Rizzo 			j_cur = nm_next(j_cur, lim);
45837e3a6d3SLuigi Rizzo 			dst_slot = &dst_ring->slot[j_cur];
45937e3a6d3SLuigi Rizzo 			dst = NMB(&dst_na->up, dst_slot);
460f0ea3689SLuigi Rizzo 
461f0ea3689SLuigi Rizzo 			/* Next source slot. */
462f0ea3689SLuigi Rizzo 			ft_p++;
463f0ea3689SLuigi Rizzo 			src = ft_p->ft_buf;
464f0ea3689SLuigi Rizzo 			dst_len = src_len = ft_p->ft_len;
465f0ea3689SLuigi Rizzo 		}
466f0ea3689SLuigi Rizzo 
467f0ea3689SLuigi Rizzo 		/* Finalize (fold) the checksum if needed. */
468f0ea3689SLuigi Rizzo 		if (check && vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
46937e3a6d3SLuigi Rizzo 			*check = nm_os_csum_fold(csum);
470f0ea3689SLuigi Rizzo 		}
471*75f4f3edSVincenzo Maffione 		nm_prdis(3, "using %u dst_slots", dst_slots);
472f0ea3689SLuigi Rizzo 
47337e3a6d3SLuigi Rizzo 		/* A second pass on the destination slots to set the slot flags,
474f0ea3689SLuigi Rizzo 		 * using the right number of destination slots.
475f0ea3689SLuigi Rizzo 		 */
47637e3a6d3SLuigi Rizzo 		while (j_start != j_cur) {
47737e3a6d3SLuigi Rizzo 			dst_slot = &dst_ring->slot[j_start];
47837e3a6d3SLuigi Rizzo 			dst_slot->flags = (dst_slots << 8)| NS_MOREFRAG;
479f0ea3689SLuigi Rizzo 			j_start = nm_next(j_start, lim);
480f0ea3689SLuigi Rizzo 		}
481f0ea3689SLuigi Rizzo 		/* Clear NS_MOREFRAG flag on last entry. */
48237e3a6d3SLuigi Rizzo 		dst_slot->flags = (dst_slots << 8);
483f0ea3689SLuigi Rizzo 	}
484f0ea3689SLuigi Rizzo 
48537e3a6d3SLuigi Rizzo 	/* Update howmany and j. This is to commit the use of
48637e3a6d3SLuigi Rizzo 	 * those slots in the destination ring. */
487f0ea3689SLuigi Rizzo 	if (unlikely(dst_slots > *howmany)) {
488*75f4f3edSVincenzo Maffione 		nm_prerr("bug: slot allocation error");
489f0ea3689SLuigi Rizzo 	}
49037e3a6d3SLuigi Rizzo 	*j = j_cur;
491f0ea3689SLuigi Rizzo 	*howmany -= dst_slots;
492f0ea3689SLuigi Rizzo }
493