1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2019 Isilon Systems, LLC.
5 * Copyright (c) 2005-2014 Sandvine Incorporated. All rights reserved.
6 * Copyright (c) 2000 Darrell Anderson
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33
34 #include "opt_ddb.h"
35 #include "opt_inet.h"
36
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/endian.h>
40 #include <sys/errno.h>
41 #include <sys/eventhandler.h>
42 #include <sys/kernel.h>
43 #include <sys/lock.h>
44 #include <sys/mutex.h>
45 #include <sys/socket.h>
46 #include <sys/sysctl.h>
47
48 #ifdef DDB
49 #include <ddb/ddb.h>
50 #include <ddb/db_lex.h>
51 #endif
52
53 #include <net/ethernet.h>
54 #include <net/if.h>
55 #include <net/if_arp.h>
56 #include <net/if_dl.h>
57 #include <net/if_types.h>
58 #include <net/if_var.h>
59 #include <net/vnet.h>
60 #include <net/route.h>
61 #include <net/route/nhop.h>
62
63 #include <netinet/in.h>
64 #include <netinet/in_fib.h>
65 #include <netinet/in_systm.h>
66 #include <netinet/in_var.h>
67 #include <netinet/ip.h>
68 #include <netinet/ip_var.h>
69 #include <netinet/ip_options.h>
70 #include <netinet/udp.h>
71 #include <netinet/udp_var.h>
72
73 #include <machine/in_cksum.h>
74 #include <machine/pcb.h>
75
76 #include <net/debugnet.h>
77 #define DEBUGNET_INTERNAL
78 #include <net/debugnet_int.h>
79
80 FEATURE(debugnet, "Debugnet support");
81
82 SYSCTL_NODE(_net, OID_AUTO, debugnet, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
83 "debugnet parameters");
84
85 unsigned debugnet_debug;
86 SYSCTL_UINT(_net_debugnet, OID_AUTO, debug, CTLFLAG_RWTUN,
87 &debugnet_debug, 0,
88 "Debug message verbosity (0: off; 1: on; 2: verbose)");
89
90 int debugnet_npolls = 2000;
91 SYSCTL_INT(_net_debugnet, OID_AUTO, npolls, CTLFLAG_RWTUN,
92 &debugnet_npolls, 0,
93 "Number of times to poll before assuming packet loss (0.5ms per poll)");
94 int debugnet_nretries = 10;
95 SYSCTL_INT(_net_debugnet, OID_AUTO, nretries, CTLFLAG_RWTUN,
96 &debugnet_nretries, 0,
97 "Number of retransmit attempts before giving up");
98 int debugnet_fib = RT_DEFAULT_FIB;
99 SYSCTL_INT(_net_debugnet, OID_AUTO, fib, CTLFLAG_RWTUN,
100 &debugnet_fib, 0,
101 "Fib to use when sending dump");
102
103 static bool g_debugnet_pcb_inuse;
104 static struct debugnet_pcb g_dnet_pcb;
105
106 /*
107 * Simple accessors for opaque PCB.
108 */
109 const unsigned char *
debugnet_get_gw_mac(const struct debugnet_pcb * pcb)110 debugnet_get_gw_mac(const struct debugnet_pcb *pcb)
111 {
112 MPASS(g_debugnet_pcb_inuse && pcb == &g_dnet_pcb &&
113 pcb->dp_state >= DN_STATE_HAVE_GW_MAC);
114 return (pcb->dp_gw_mac.octet);
115 }
116
117 /*
118 * Start of network primitives, beginning with output primitives.
119 */
120
121 /*
122 * Handles creation of the ethernet header, then places outgoing packets into
123 * the tx buffer for the NIC
124 *
125 * Parameters:
126 * m The mbuf containing the packet to be sent (will be freed by
127 * this function or the NIC driver)
128 * ifp The interface to send on
129 * dst The destination ethernet address (source address will be looked
130 * up using ifp)
131 * etype The ETHERTYPE_* value for the protocol that is being sent
132 *
133 * Returns:
134 * int see errno.h, 0 for success
135 */
136 int
debugnet_ether_output(struct mbuf * m,struct ifnet * ifp,struct ether_addr dst,u_short etype)137 debugnet_ether_output(struct mbuf *m, struct ifnet *ifp, struct ether_addr dst,
138 u_short etype)
139 {
140 struct ether_header *eh;
141
142 if (((ifp->if_flags & (IFF_MONITOR | IFF_UP)) != IFF_UP) ||
143 (ifp->if_drv_flags & IFF_DRV_RUNNING) != IFF_DRV_RUNNING) {
144 if_printf(ifp, "%s: interface isn't up\n", __func__);
145 m_freem(m);
146 return (ENETDOWN);
147 }
148
149 /* Fill in the ethernet header. */
150 M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT);
151 if (m == NULL) {
152 printf("%s: out of mbufs\n", __func__);
153 return (ENOBUFS);
154 }
155 eh = mtod(m, struct ether_header *);
156 memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
157 memcpy(eh->ether_dhost, dst.octet, ETHER_ADDR_LEN);
158 eh->ether_type = htons(etype);
159 return (ifp->if_debugnet_methods->dn_transmit(ifp, m));
160 }
161
162 /*
163 * Unreliable transmission of an mbuf chain to the debugnet server
164 * Note: can't handle fragmentation; fails if the packet is larger than
165 * ifp->if_mtu after adding the UDP/IP headers
166 *
167 * Parameters:
168 * pcb The debugnet context block
169 * m mbuf chain
170 *
171 * Returns:
172 * int see errno.h, 0 for success
173 */
174 static int
debugnet_udp_output(struct debugnet_pcb * pcb,struct mbuf * m)175 debugnet_udp_output(struct debugnet_pcb *pcb, struct mbuf *m)
176 {
177 struct udphdr *udp;
178
179 MPASS(pcb->dp_state >= DN_STATE_HAVE_GW_MAC);
180
181 M_PREPEND(m, sizeof(*udp), M_NOWAIT);
182 if (m == NULL) {
183 printf("%s: out of mbufs\n", __func__);
184 return (ENOBUFS);
185 }
186
187 udp = mtod(m, void *);
188 udp->uh_ulen = htons(m->m_pkthdr.len);
189 /* Use this src port so that the server can connect() the socket */
190 udp->uh_sport = htons(pcb->dp_client_port);
191 udp->uh_dport = htons(pcb->dp_server_port);
192 /* Computed later (protocol-dependent). */
193 udp->uh_sum = 0;
194
195 return (debugnet_ip_output(pcb, m));
196 }
197
198 int
debugnet_ack_output(struct debugnet_pcb * pcb,uint32_t seqno)199 debugnet_ack_output(struct debugnet_pcb *pcb, uint32_t seqno /* net endian */)
200 {
201 struct debugnet_ack *dn_ack;
202 struct mbuf *m;
203
204 DNETDEBUG("Acking with seqno %u\n", ntohl(seqno));
205
206 m = m_gethdr(M_NOWAIT, MT_DATA);
207 if (m == NULL) {
208 printf("%s: Out of mbufs\n", __func__);
209 return (ENOBUFS);
210 }
211 m->m_len = sizeof(*dn_ack);
212 m->m_pkthdr.len = sizeof(*dn_ack);
213 MH_ALIGN(m, sizeof(*dn_ack));
214 dn_ack = mtod(m, void *);
215 dn_ack->da_seqno = seqno;
216
217 return (debugnet_udp_output(pcb, m));
218 }
219
220 /*
221 * Dummy free function for debugnet clusters.
222 */
223 static void
debugnet_mbuf_free(struct mbuf * m __unused)224 debugnet_mbuf_free(struct mbuf *m __unused)
225 {
226 }
227
228 /*
229 * Construct and reliably send a debugnet packet. May fail from a resource
230 * shortage or extreme number of unacknowledged retransmissions. Wait for
231 * an acknowledgement before returning. Splits packets into chunks small
232 * enough to be sent without fragmentation (looks up the interface MTU)
233 *
234 * Parameters:
235 * type debugnet packet type (HERALD, FINISHED, ...)
236 * data data
237 * datalen data size (bytes)
238 * auxdata optional auxiliary information
239 *
240 * Returns:
241 * int see errno.h, 0 for success
242 */
243 int
debugnet_send(struct debugnet_pcb * pcb,uint32_t type,const void * data,uint32_t datalen,const struct debugnet_proto_aux * auxdata)244 debugnet_send(struct debugnet_pcb *pcb, uint32_t type, const void *data,
245 uint32_t datalen, const struct debugnet_proto_aux *auxdata)
246 {
247 struct debugnet_msg_hdr *dn_msg_hdr;
248 struct mbuf *m, *m2;
249 uint64_t want_acks;
250 uint32_t i, pktlen, sent_so_far;
251 int retries, polls, error;
252
253 if (pcb->dp_state == DN_STATE_REMOTE_CLOSED)
254 return (ECONNRESET);
255
256 want_acks = 0;
257 pcb->dp_rcvd_acks = 0;
258 retries = 0;
259
260 retransmit:
261 /* Chunks can be too big to fit in packets. */
262 for (i = sent_so_far = 0; sent_so_far < datalen ||
263 (i == 0 && datalen == 0); i++) {
264 pktlen = datalen - sent_so_far;
265
266 /* Bound: the interface MTU (assume no IP options). */
267 pktlen = min(pktlen, pcb->dp_ifp->if_mtu -
268 sizeof(struct udpiphdr) - sizeof(struct debugnet_msg_hdr));
269
270 /*
271 * Check if it is retransmitting and this has been ACKed
272 * already.
273 */
274 if ((pcb->dp_rcvd_acks & (1 << i)) != 0) {
275 sent_so_far += pktlen;
276 continue;
277 }
278
279 /*
280 * Get and fill a header mbuf, then chain data as an extended
281 * mbuf.
282 */
283 m = m_gethdr(M_NOWAIT, MT_DATA);
284 if (m == NULL) {
285 printf("%s: Out of mbufs\n", __func__);
286 return (ENOBUFS);
287 }
288 m->m_len = sizeof(struct debugnet_msg_hdr);
289 m->m_pkthdr.len = sizeof(struct debugnet_msg_hdr);
290 MH_ALIGN(m, sizeof(struct debugnet_msg_hdr));
291 dn_msg_hdr = mtod(m, struct debugnet_msg_hdr *);
292 dn_msg_hdr->mh_seqno = htonl(pcb->dp_seqno + i);
293 dn_msg_hdr->mh_type = htonl(type);
294 dn_msg_hdr->mh_len = htonl(pktlen);
295
296 if (auxdata != NULL) {
297 dn_msg_hdr->mh_offset =
298 htobe64(auxdata->dp_offset_start + sent_so_far);
299 dn_msg_hdr->mh_aux2 = htobe32(auxdata->dp_aux2);
300 } else {
301 dn_msg_hdr->mh_offset = htobe64(sent_so_far);
302 dn_msg_hdr->mh_aux2 = 0;
303 }
304
305 if (pktlen != 0) {
306 m2 = m_get(M_NOWAIT, MT_DATA);
307 if (m2 == NULL) {
308 m_freem(m);
309 printf("%s: Out of mbufs\n", __func__);
310 return (ENOBUFS);
311 }
312 MEXTADD(m2, __DECONST(char *, data) + sent_so_far,
313 pktlen, debugnet_mbuf_free, NULL, NULL, 0,
314 EXT_DISPOSABLE);
315 m2->m_len = pktlen;
316
317 m_cat(m, m2);
318 m->m_pkthdr.len += pktlen;
319 }
320 error = debugnet_udp_output(pcb, m);
321 if (error != 0)
322 return (error);
323
324 /* Note that we're waiting for this packet in the bitfield. */
325 want_acks |= (1 << i);
326 sent_so_far += pktlen;
327 }
328 if (i >= DEBUGNET_MAX_IN_FLIGHT)
329 printf("Warning: Sent more than %d packets (%d). "
330 "Acknowledgements will fail unless the size of "
331 "rcvd_acks/want_acks is increased.\n",
332 DEBUGNET_MAX_IN_FLIGHT, i);
333
334 /*
335 * Wait for acks. A *real* window would speed things up considerably.
336 */
337 polls = 0;
338 while (pcb->dp_rcvd_acks != want_acks) {
339 if (polls++ > debugnet_npolls) {
340 if (retries++ > debugnet_nretries)
341 return (ETIMEDOUT);
342 printf(". ");
343 goto retransmit;
344 }
345 debugnet_network_poll(pcb);
346 DELAY(500);
347 if (pcb->dp_state == DN_STATE_REMOTE_CLOSED)
348 return (ECONNRESET);
349 }
350 pcb->dp_seqno += i;
351 return (0);
352 }
353
354 /*
355 * Network input primitives.
356 */
357
358 /*
359 * Just introspect the header enough to fire off a seqno ack and validate
360 * length fits.
361 */
362 static void
debugnet_handle_rx_msg(struct debugnet_pcb * pcb,struct mbuf ** mb)363 debugnet_handle_rx_msg(struct debugnet_pcb *pcb, struct mbuf **mb)
364 {
365 const struct debugnet_msg_hdr *dnh;
366 struct mbuf *m;
367 int error;
368
369 m = *mb;
370
371 if (m->m_pkthdr.len < sizeof(*dnh)) {
372 DNETDEBUG("ignoring small debugnet_msg packet\n");
373 return;
374 }
375
376 /* Get ND header. */
377 if (m->m_len < sizeof(*dnh)) {
378 m = m_pullup(m, sizeof(*dnh));
379 *mb = m;
380 if (m == NULL) {
381 DNETDEBUG("m_pullup failed\n");
382 return;
383 }
384 }
385 dnh = mtod(m, const void *);
386
387 if (ntohl(dnh->mh_len) + sizeof(*dnh) > m->m_pkthdr.len) {
388 DNETDEBUG("Dropping short packet.\n");
389 return;
390 }
391
392 /*
393 * If the issue is transient (ENOBUFS), sender should resend. If
394 * non-transient (like driver objecting to rx -> tx from the same
395 * thread), not much else we can do.
396 */
397 error = debugnet_ack_output(pcb, dnh->mh_seqno);
398 if (error != 0)
399 return;
400
401 if (ntohl(dnh->mh_type) == DEBUGNET_FINISHED) {
402 printf("Remote shut down the connection on us!\n");
403 pcb->dp_state = DN_STATE_REMOTE_CLOSED;
404
405 /*
406 * Continue through to the user handler so they are signalled
407 * not to wait for further rx.
408 */
409 }
410
411 pcb->dp_rx_handler(pcb, mb);
412 }
413
414 static void
debugnet_handle_ack(struct debugnet_pcb * pcb,struct mbuf ** mb,uint16_t sport)415 debugnet_handle_ack(struct debugnet_pcb *pcb, struct mbuf **mb, uint16_t sport)
416 {
417 const struct debugnet_ack *dn_ack;
418 struct mbuf *m;
419 uint32_t rcv_ackno;
420
421 m = *mb;
422
423 /* Get Ack. */
424 if (m->m_len < sizeof(*dn_ack)) {
425 m = m_pullup(m, sizeof(*dn_ack));
426 *mb = m;
427 if (m == NULL) {
428 DNETDEBUG("m_pullup failed\n");
429 return;
430 }
431 }
432 dn_ack = mtod(m, const void *);
433
434 /* Debugnet processing. */
435 /*
436 * Packet is meant for us. Extract the ack sequence number and the
437 * port number if necessary.
438 */
439 rcv_ackno = ntohl(dn_ack->da_seqno);
440 if (pcb->dp_state < DN_STATE_GOT_HERALD_PORT) {
441 pcb->dp_server_port = sport;
442 pcb->dp_state = DN_STATE_GOT_HERALD_PORT;
443 }
444 if (rcv_ackno >= pcb->dp_seqno + DEBUGNET_MAX_IN_FLIGHT)
445 printf("%s: ACK %u too far in future!\n", __func__, rcv_ackno);
446 else if (rcv_ackno >= pcb->dp_seqno) {
447 /* We're interested in this ack. Record it. */
448 pcb->dp_rcvd_acks |= 1 << (rcv_ackno - pcb->dp_seqno);
449 }
450 }
451
452 void
debugnet_handle_udp(struct debugnet_pcb * pcb,struct mbuf ** mb)453 debugnet_handle_udp(struct debugnet_pcb *pcb, struct mbuf **mb)
454 {
455 const struct udphdr *udp;
456 struct mbuf *m;
457 uint16_t sport, ulen;
458
459 /* UDP processing. */
460
461 m = *mb;
462 if (m->m_pkthdr.len < sizeof(*udp)) {
463 DNETDEBUG("ignoring small UDP packet\n");
464 return;
465 }
466
467 /* Get UDP headers. */
468 if (m->m_len < sizeof(*udp)) {
469 m = m_pullup(m, sizeof(*udp));
470 *mb = m;
471 if (m == NULL) {
472 DNETDEBUG("m_pullup failed\n");
473 return;
474 }
475 }
476 udp = mtod(m, const void *);
477
478 /* We expect to receive UDP packets on the configured client port. */
479 if (ntohs(udp->uh_dport) != pcb->dp_client_port) {
480 DNETDEBUG("not on the expected port.\n");
481 return;
482 }
483
484 /* Check that ulen does not exceed actual size of data. */
485 ulen = ntohs(udp->uh_ulen);
486 if (m->m_pkthdr.len < ulen) {
487 DNETDEBUG("ignoring runt UDP packet\n");
488 return;
489 }
490
491 sport = ntohs(udp->uh_sport);
492
493 m_adj(m, sizeof(*udp));
494 ulen -= sizeof(*udp);
495
496 if (ulen == sizeof(struct debugnet_ack)) {
497 debugnet_handle_ack(pcb, mb, sport);
498 return;
499 }
500
501 if (pcb->dp_rx_handler == NULL) {
502 if (ulen < sizeof(struct debugnet_ack))
503 DNETDEBUG("ignoring small ACK packet\n");
504 else
505 DNETDEBUG("ignoring unexpected non-ACK packet on "
506 "half-duplex connection.\n");
507 return;
508 }
509
510 debugnet_handle_rx_msg(pcb, mb);
511 }
512
513 /*
514 * Handler for incoming packets directly from the network adapter
515 * Identifies the packet type (IP or ARP) and passes it along to one of the
516 * helper functions debugnet_handle_ip or debugnet_handle_arp.
517 *
518 * It needs to partially replicate the behaviour of ether_input() and
519 * ether_demux().
520 *
521 * Parameters:
522 * ifp the interface the packet came from
523 * m an mbuf containing the packet received
524 */
525 static void
debugnet_pkt_in(struct ifnet * ifp,struct mbuf * m)526 debugnet_pkt_in(struct ifnet *ifp, struct mbuf *m)
527 {
528 struct ifreq ifr;
529 struct ether_header *eh;
530 u_short etype;
531
532 /* Ethernet processing. */
533 if ((m->m_flags & M_PKTHDR) == 0) {
534 DNETDEBUG_IF(ifp, "discard frame without packet header\n");
535 goto done;
536 }
537 if (m->m_len < ETHER_HDR_LEN) {
538 DNETDEBUG_IF(ifp,
539 "discard frame without leading eth header (len %u pktlen %u)\n",
540 m->m_len, m->m_pkthdr.len);
541 goto done;
542 }
543 if ((m->m_flags & M_HASFCS) != 0) {
544 m_adj(m, -ETHER_CRC_LEN);
545 m->m_flags &= ~M_HASFCS;
546 }
547 eh = mtod(m, struct ether_header *);
548 etype = ntohs(eh->ether_type);
549 if ((m->m_flags & M_VLANTAG) != 0 || etype == ETHERTYPE_VLAN) {
550 DNETDEBUG_IF(ifp, "ignoring vlan packets\n");
551 goto done;
552 }
553 if (if_gethwaddr(ifp, &ifr) != 0) {
554 DNETDEBUG_IF(ifp, "failed to get hw addr for interface\n");
555 goto done;
556 }
557 if (memcmp(ifr.ifr_addr.sa_data, eh->ether_dhost,
558 ETHER_ADDR_LEN) != 0 &&
559 (etype != ETHERTYPE_ARP || !ETHER_IS_BROADCAST(eh->ether_dhost))) {
560 DNETDEBUG_IF(ifp,
561 "discard frame with incorrect destination addr\n");
562 goto done;
563 }
564
565 MPASS(g_debugnet_pcb_inuse);
566
567 /* Done ethernet processing. Strip off the ethernet header. */
568 m_adj(m, ETHER_HDR_LEN);
569 switch (etype) {
570 case ETHERTYPE_ARP:
571 debugnet_handle_arp(&g_dnet_pcb, &m);
572 break;
573 case ETHERTYPE_IP:
574 debugnet_handle_ip(&g_dnet_pcb, &m);
575 break;
576 default:
577 DNETDEBUG_IF(ifp, "dropping unknown ethertype %hu\n", etype);
578 break;
579 }
580 done:
581 if (m != NULL)
582 m_freem(m);
583 }
584
585 /*
586 * Network polling primitive.
587 *
588 * Instead of assuming that most of the network stack is sane, we just poll the
589 * driver directly for packets.
590 */
591 void
debugnet_network_poll(struct debugnet_pcb * pcb)592 debugnet_network_poll(struct debugnet_pcb *pcb)
593 {
594 struct ifnet *ifp;
595
596 ifp = pcb->dp_ifp;
597 ifp->if_debugnet_methods->dn_poll(ifp, 1000);
598 }
599
600 /*
601 * Start of consumer API surface.
602 */
603 void
debugnet_free(struct debugnet_pcb * pcb)604 debugnet_free(struct debugnet_pcb *pcb)
605 {
606 struct ifnet *ifp;
607
608 MPASS(g_debugnet_pcb_inuse);
609 MPASS(pcb == &g_dnet_pcb);
610
611 ifp = pcb->dp_ifp;
612 if (ifp != NULL) {
613 if (pcb->dp_drv_input != NULL)
614 ifp->if_input = pcb->dp_drv_input;
615 if (pcb->dp_event_started)
616 ifp->if_debugnet_methods->dn_event(ifp, DEBUGNET_END);
617 }
618 debugnet_mbuf_finish();
619
620 g_debugnet_pcb_inuse = false;
621 memset(&g_dnet_pcb, 0xfd, sizeof(g_dnet_pcb));
622 }
623
624 int
debugnet_connect(const struct debugnet_conn_params * dcp,struct debugnet_pcb ** pcb_out)625 debugnet_connect(const struct debugnet_conn_params *dcp,
626 struct debugnet_pcb **pcb_out)
627 {
628 struct debugnet_proto_aux herald_auxdata;
629 struct debugnet_pcb *pcb;
630 struct ifnet *ifp;
631 int error;
632
633 if (g_debugnet_pcb_inuse) {
634 printf("%s: Only one connection at a time.\n", __func__);
635 return (EBUSY);
636 }
637
638 pcb = &g_dnet_pcb;
639 *pcb = (struct debugnet_pcb) {
640 .dp_state = DN_STATE_INIT,
641 .dp_client = dcp->dc_client,
642 .dp_server = dcp->dc_server,
643 .dp_gateway = dcp->dc_gateway,
644 .dp_server_port = dcp->dc_herald_port, /* Initially */
645 .dp_client_port = dcp->dc_client_port,
646 .dp_seqno = 1,
647 .dp_ifp = dcp->dc_ifp,
648 .dp_rx_handler = dcp->dc_rx_handler,
649 };
650
651 /* Switch to the debugnet mbuf zones. */
652 debugnet_mbuf_start();
653
654 /* At least one needed parameter is missing; infer it. */
655 if (pcb->dp_client == INADDR_ANY || pcb->dp_gateway == INADDR_ANY ||
656 pcb->dp_ifp == NULL) {
657 struct sockaddr_in dest_sin, *gw_sin, *local_sin;
658 struct ifnet *rt_ifp;
659 struct nhop_object *nh;
660
661 memset(&dest_sin, 0, sizeof(dest_sin));
662 dest_sin = (struct sockaddr_in) {
663 .sin_len = sizeof(dest_sin),
664 .sin_family = AF_INET,
665 .sin_addr.s_addr = pcb->dp_server,
666 };
667
668 CURVNET_SET(vnet0);
669 nh = fib4_lookup_debugnet(debugnet_fib, dest_sin.sin_addr, 0,
670 NHR_NONE);
671 CURVNET_RESTORE();
672
673 if (nh == NULL) {
674 printf("%s: Could not get route for that server.\n",
675 __func__);
676 error = ENOENT;
677 goto cleanup;
678 }
679
680 /* TODO support AF_INET6 */
681 if (nh->gw_sa.sa_family == AF_INET)
682 gw_sin = &nh->gw4_sa;
683 else {
684 if (nh->gw_sa.sa_family == AF_LINK)
685 DNETDEBUG("Destination address is on link.\n");
686 gw_sin = NULL;
687 }
688
689 MPASS(nh->nh_ifa->ifa_addr->sa_family == AF_INET);
690 local_sin = (struct sockaddr_in *)nh->nh_ifa->ifa_addr;
691
692 rt_ifp = nh->nh_ifp;
693
694 if (pcb->dp_client == INADDR_ANY)
695 pcb->dp_client = local_sin->sin_addr.s_addr;
696 if (pcb->dp_gateway == INADDR_ANY && gw_sin != NULL)
697 pcb->dp_gateway = gw_sin->sin_addr.s_addr;
698 if (pcb->dp_ifp == NULL)
699 pcb->dp_ifp = rt_ifp;
700 }
701
702 ifp = pcb->dp_ifp;
703
704 if (debugnet_debug > 0) {
705 char serbuf[INET_ADDRSTRLEN], clibuf[INET_ADDRSTRLEN],
706 gwbuf[INET_ADDRSTRLEN];
707 inet_ntop(AF_INET, &pcb->dp_server, serbuf, sizeof(serbuf));
708 inet_ntop(AF_INET, &pcb->dp_client, clibuf, sizeof(clibuf));
709 if (pcb->dp_gateway != INADDR_ANY)
710 inet_ntop(AF_INET, &pcb->dp_gateway, gwbuf, sizeof(gwbuf));
711 DNETDEBUG("Connecting to %s:%d%s%s from %s:%d on %s\n",
712 serbuf, pcb->dp_server_port,
713 (pcb->dp_gateway == INADDR_ANY) ? "" : " via ",
714 (pcb->dp_gateway == INADDR_ANY) ? "" : gwbuf,
715 clibuf, pcb->dp_client_port, if_name(ifp));
716 }
717
718 /* Validate iface is online and supported. */
719 if (!DEBUGNET_SUPPORTED_NIC(ifp)) {
720 printf("%s: interface '%s' does not support debugnet\n",
721 __func__, if_name(ifp));
722 error = ENODEV;
723 goto cleanup;
724 }
725 if ((if_getflags(ifp) & IFF_UP) == 0) {
726 printf("%s: interface '%s' link is down\n", __func__,
727 if_name(ifp));
728 error = ENXIO;
729 goto cleanup;
730 }
731
732 ifp->if_debugnet_methods->dn_event(ifp, DEBUGNET_START);
733 pcb->dp_event_started = true;
734
735 /*
736 * We maintain the invariant that g_debugnet_pcb_inuse is always true
737 * while the debugnet ifp's if_input is overridden with
738 * debugnet_pkt_in.
739 */
740 g_debugnet_pcb_inuse = true;
741
742 /* Make the card use *our* receive callback. */
743 pcb->dp_drv_input = ifp->if_input;
744 ifp->if_input = debugnet_pkt_in;
745
746 printf("%s: searching for %s MAC...\n", __func__,
747 (dcp->dc_gateway == INADDR_ANY) ? "server" : "gateway");
748
749 error = debugnet_arp_gw(pcb);
750 if (error != 0) {
751 printf("%s: failed to locate MAC address\n", __func__);
752 goto cleanup;
753 }
754 MPASS(pcb->dp_state == DN_STATE_HAVE_GW_MAC);
755
756 herald_auxdata = (struct debugnet_proto_aux) {
757 .dp_offset_start = dcp->dc_herald_offset,
758 .dp_aux2 = dcp->dc_herald_aux2,
759 };
760 error = debugnet_send(pcb, DEBUGNET_HERALD, dcp->dc_herald_data,
761 dcp->dc_herald_datalen, &herald_auxdata);
762 if (error != 0) {
763 printf("%s: failed to herald debugnet server\n", __func__);
764 goto cleanup;
765 }
766
767 *pcb_out = pcb;
768 return (0);
769
770 cleanup:
771 debugnet_free(pcb);
772 return (error);
773 }
774
775 /*
776 * Pre-allocated dump-time mbuf tracking.
777 *
778 * We just track the high water mark we've ever seen and allocate appropriately
779 * for that iface/mtu combo.
780 */
781 static struct {
782 int nmbuf;
783 int ncl;
784 int clsize;
785 } dn_hwm;
786 static struct mtx dn_hwm_lk;
787 MTX_SYSINIT(debugnet_hwm_lock, &dn_hwm_lk, "Debugnet HWM lock", MTX_DEF);
788
789 static void
dn_maybe_reinit_mbufs(int nmbuf,int ncl,int clsize)790 dn_maybe_reinit_mbufs(int nmbuf, int ncl, int clsize)
791 {
792 bool any;
793
794 any = false;
795 mtx_lock(&dn_hwm_lk);
796
797 if (nmbuf > dn_hwm.nmbuf) {
798 any = true;
799 dn_hwm.nmbuf = nmbuf;
800 } else
801 nmbuf = dn_hwm.nmbuf;
802
803 if (ncl > dn_hwm.ncl) {
804 any = true;
805 dn_hwm.ncl = ncl;
806 } else
807 ncl = dn_hwm.ncl;
808
809 if (clsize > dn_hwm.clsize) {
810 any = true;
811 dn_hwm.clsize = clsize;
812 } else
813 clsize = dn_hwm.clsize;
814
815 mtx_unlock(&dn_hwm_lk);
816
817 if (any)
818 debugnet_mbuf_reinit(nmbuf, ncl, clsize);
819 }
820
821 void
debugnet_any_ifnet_update(struct ifnet * ifp)822 debugnet_any_ifnet_update(struct ifnet *ifp)
823 {
824 int clsize, nmbuf, ncl, nrxr;
825
826 if (!DEBUGNET_SUPPORTED_NIC(ifp))
827 return;
828
829 ifp->if_debugnet_methods->dn_init(ifp, &nrxr, &ncl, &clsize);
830 KASSERT(nrxr > 0, ("invalid receive ring count %d", nrxr));
831
832 /*
833 * We need two headers per message on the transmit side. Multiply by
834 * four to give us some breathing room.
835 */
836 nmbuf = ncl * (4 + nrxr);
837 ncl *= nrxr;
838
839 /*
840 * Bandaid for drivers that (incorrectly) advertise LinkUp before their
841 * dn_init method is available.
842 */
843 if (nmbuf == 0 || ncl == 0 || clsize == 0) {
844 printf("%s: Bad dn_init result from %s (ifp %p), ignoring.\n",
845 __func__, if_name(ifp), ifp);
846 return;
847 }
848 dn_maybe_reinit_mbufs(nmbuf, ncl, clsize);
849 }
850
851 /*
852 * Unfortunately, the ifnet_arrival_event eventhandler hook is mostly useless
853 * for us because drivers tend to if_attach before invoking DEBUGNET_SET().
854 *
855 * On the other hand, hooking DEBUGNET_SET() itself may still be too early,
856 * because the driver is still in attach. Since we cannot use down interfaces,
857 * maybe hooking ifnet_event:IFNET_EVENT_UP is sufficient? ... Nope, at least
858 * with vtnet and dhcpclient that event just never occurs.
859 *
860 * So that's how I've landed on the lower level ifnet_link_event.
861 */
862
863 static void
dn_ifnet_event(void * arg __unused,struct ifnet * ifp,int link_state)864 dn_ifnet_event(void *arg __unused, struct ifnet *ifp, int link_state)
865 {
866 if (link_state == LINK_STATE_UP)
867 debugnet_any_ifnet_update(ifp);
868 }
869
870 static eventhandler_tag dn_attach_cookie;
871 static void
dn_evh_init(void * ctx __unused)872 dn_evh_init(void *ctx __unused)
873 {
874 dn_attach_cookie = EVENTHANDLER_REGISTER(ifnet_link_event,
875 dn_ifnet_event, NULL, EVENTHANDLER_PRI_ANY);
876 }
877 SYSINIT(dn_evh_init, SI_SUB_EVENTHANDLER + 1, SI_ORDER_ANY, dn_evh_init, NULL);
878
879 /*
880 * DDB parsing helpers for debugnet(4) consumers.
881 */
882 #ifdef DDB
883 struct my_inet_opt {
884 bool has_opt;
885 const char *printname;
886 in_addr_t *result;
887 };
888
889 static int
dn_parse_optarg_ipv4(struct my_inet_opt * opt)890 dn_parse_optarg_ipv4(struct my_inet_opt *opt)
891 {
892 in_addr_t tmp;
893 unsigned octet;
894 int t;
895
896 tmp = 0;
897 for (octet = 0; octet < 4; octet++) {
898 t = db_read_token_flags(DRT_WSPACE | DRT_DECIMAL);
899 if (t != tNUMBER) {
900 db_printf("%s:%s: octet %u expected number; found %d\n",
901 __func__, opt->printname, octet, t);
902 return (EINVAL);
903 }
904 /*
905 * db_lex lexes '-' distinctly from the number itself, but
906 * let's document that invariant.
907 */
908 MPASS(db_tok_number >= 0);
909
910 if (db_tok_number > UINT8_MAX) {
911 db_printf("%s:%s: octet %u out of range: %jd\n", __func__,
912 opt->printname, octet, (intmax_t)db_tok_number);
913 return (EDOM);
914 }
915
916 /* Constructed host-endian and converted to network later. */
917 tmp = (tmp << 8) | db_tok_number;
918
919 if (octet < 3) {
920 t = db_read_token_flags(DRT_WSPACE);
921 if (t != tDOT) {
922 db_printf("%s:%s: octet %u expected '.'; found"
923 " %d\n", __func__, opt->printname, octet,
924 t);
925 return (EINVAL);
926 }
927 }
928 }
929
930 *opt->result = htonl(tmp);
931 opt->has_opt = true;
932 return (0);
933 }
934
935 int
debugnet_parse_ddb_cmd(const char * cmd,struct debugnet_ddb_config * result)936 debugnet_parse_ddb_cmd(const char *cmd, struct debugnet_ddb_config *result)
937 {
938 struct ifnet *ifp;
939 int t, error;
940 bool want_ifp;
941 char ch;
942
943 struct my_inet_opt opt_client = {
944 .printname = "client",
945 .result = &result->dd_client,
946 },
947 opt_server = {
948 .printname = "server",
949 .result = &result->dd_server,
950 },
951 opt_gateway = {
952 .printname = "gateway",
953 .result = &result->dd_gateway,
954 },
955 *cur_inet_opt;
956
957 ifp = NULL;
958 memset(result, 0, sizeof(*result));
959
960 /*
961 * command [space] [-] [opt] [[space] [optarg]] ...
962 *
963 * db_command has already lexed 'command' for us.
964 */
965 t = db_read_token_flags(DRT_WSPACE);
966 if (t == tWSPACE)
967 t = db_read_token_flags(DRT_WSPACE);
968
969 while (t != tEOL) {
970 if (t != tMINUS) {
971 db_printf("%s: Bad syntax; expected '-', got %d\n",
972 cmd, t);
973 goto usage;
974 }
975
976 t = db_read_token_flags(DRT_WSPACE);
977 if (t != tIDENT) {
978 db_printf("%s: Bad syntax; expected tIDENT, got %d\n",
979 cmd, t);
980 goto usage;
981 }
982
983 if (strlen(db_tok_string) > 1) {
984 db_printf("%s: Bad syntax; expected single option "
985 "flag, got '%s'\n", cmd, db_tok_string);
986 goto usage;
987 }
988
989 want_ifp = false;
990 cur_inet_opt = NULL;
991 switch ((ch = db_tok_string[0])) {
992 default:
993 DNETDEBUG("Unexpected: '%c'\n", ch);
994 /* FALLTHROUGH */
995 case 'h':
996 goto usage;
997 case 'c':
998 cur_inet_opt = &opt_client;
999 break;
1000 case 'g':
1001 cur_inet_opt = &opt_gateway;
1002 break;
1003 case 's':
1004 cur_inet_opt = &opt_server;
1005 break;
1006 case 'i':
1007 want_ifp = true;
1008 break;
1009 }
1010
1011 t = db_read_token_flags(DRT_WSPACE);
1012 if (t != tWSPACE) {
1013 db_printf("%s: Bad syntax; expected space after "
1014 "flag %c, got %d\n", cmd, ch, t);
1015 goto usage;
1016 }
1017
1018 if (want_ifp) {
1019 t = db_read_token_flags(DRT_WSPACE);
1020 if (t != tIDENT) {
1021 db_printf("%s: Expected interface but got %d\n",
1022 cmd, t);
1023 goto usage;
1024 }
1025
1026 CURVNET_SET(vnet0);
1027 /*
1028 * We *don't* take a ref here because the only current
1029 * consumer, db_netdump_cmd, does not need it. It
1030 * (somewhat redundantly) extracts the if_name(),
1031 * re-lookups the ifp, and takes its own reference.
1032 */
1033 ifp = ifunit(db_tok_string);
1034 CURVNET_RESTORE();
1035 if (ifp == NULL) {
1036 db_printf("Could not locate interface %s\n",
1037 db_tok_string);
1038 goto cleanup;
1039 }
1040 } else {
1041 MPASS(cur_inet_opt != NULL);
1042 /* Assume IPv4 for now. */
1043 error = dn_parse_optarg_ipv4(cur_inet_opt);
1044 if (error != 0)
1045 goto cleanup;
1046 }
1047
1048 /* Skip (mandatory) whitespace after option, if not EOL. */
1049 t = db_read_token_flags(DRT_WSPACE);
1050 if (t == tEOL)
1051 break;
1052 if (t != tWSPACE) {
1053 db_printf("%s: Bad syntax; expected space after "
1054 "flag %c option; got %d\n", cmd, ch, t);
1055 goto usage;
1056 }
1057 t = db_read_token_flags(DRT_WSPACE);
1058 }
1059
1060 if (!opt_server.has_opt) {
1061 db_printf("%s: need a destination server address\n", cmd);
1062 goto usage;
1063 }
1064
1065 result->dd_has_client = opt_client.has_opt;
1066 result->dd_has_gateway = opt_gateway.has_opt;
1067 result->dd_ifp = ifp;
1068
1069 /* We parsed the full line to tEOL already, or bailed with an error. */
1070 return (0);
1071
1072 usage:
1073 db_printf("Usage: %s -s <server> [-g <gateway> -c <localip> "
1074 "-i <interface>]\n", cmd);
1075 error = EINVAL;
1076 /* FALLTHROUGH */
1077 cleanup:
1078 db_skip_to_eol();
1079 return (error);
1080 }
1081 #endif /* DDB */
1082