1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
5 * The Regents of the University of California.
6 * Copyright (c) 2008 Robert N. M. Watson
7 * Copyright (c) 2010-2011 Juniper Networks, Inc.
8 * Copyright (c) 2014 Kevin Lo
9 * All rights reserved.
10 *
11 * Portions of this software were developed by Robert N. M. Watson under
12 * contract to Juniper Networks, Inc.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * 1. Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution.
22 * 3. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * @(#)udp_usrreq.c 8.6 (Berkeley) 5/23/95
39 */
40
41 #include <sys/cdefs.h>
42 __FBSDID("$FreeBSD$");
43
44 #include "opt_inet.h"
45 #include "opt_inet6.h"
46 #include "opt_ipsec.h"
47 #include "opt_route.h"
48 #include "opt_rss.h"
49
50 #include <sys/param.h>
51 #include <sys/domain.h>
52 #include <sys/eventhandler.h>
53 #include <sys/jail.h>
54 #include <sys/kernel.h>
55 #include <sys/lock.h>
56 #include <sys/malloc.h>
57 #include <sys/mbuf.h>
58 #include <sys/priv.h>
59 #include <sys/proc.h>
60 #include <sys/protosw.h>
61 #include <sys/sdt.h>
62 #include <sys/signalvar.h>
63 #include <sys/socket.h>
64 #include <sys/socketvar.h>
65 #include <sys/sx.h>
66 #include <sys/sysctl.h>
67 #include <sys/syslog.h>
68 #include <sys/systm.h>
69
70 #include <vm/uma.h>
71
72 #include <net/if.h>
73 #include <net/if_var.h>
74 #include <net/route.h>
75 #include <net/route/nhop.h>
76 #include <net/rss_config.h>
77
78 #include <netinet/in.h>
79 #include <netinet/in_kdtrace.h>
80 #include <netinet/in_fib.h>
81 #include <netinet/in_pcb.h>
82 #include <netinet/in_systm.h>
83 #include <netinet/in_var.h>
84 #include <netinet/ip.h>
85 #ifdef INET6
86 #include <netinet/ip6.h>
87 #endif
88 #include <netinet/ip_icmp.h>
89 #include <netinet/icmp_var.h>
90 #include <netinet/ip_var.h>
91 #include <netinet/ip_options.h>
92 #ifdef INET6
93 #include <netinet6/ip6_var.h>
94 #endif
95 #include <netinet/udp.h>
96 #include <netinet/udp_var.h>
97 #include <netinet/udplite.h>
98 #include <netinet/in_rss.h>
99
100 #include <netipsec/ipsec_support.h>
101
102 #include <machine/in_cksum.h>
103
104 #include <security/mac/mac_framework.h>
105
106 /*
107 * UDP and UDP-Lite protocols implementation.
108 * Per RFC 768, August, 1980.
109 * Per RFC 3828, July, 2004.
110 */
111
112 /*
113 * BSD 4.2 defaulted the udp checksum to be off. Turning off udp checksums
114 * removes the only data integrity mechanism for packets and malformed
115 * packets that would otherwise be discarded due to bad checksums, and may
116 * cause problems (especially for NFS data blocks).
117 */
118 VNET_DEFINE(int, udp_cksum) = 1;
119 SYSCTL_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, CTLFLAG_VNET | CTLFLAG_RW,
120 &VNET_NAME(udp_cksum), 0, "compute udp checksum");
121
122 VNET_DEFINE(int, udp_log_in_vain) = 0;
123 SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_VNET | CTLFLAG_RW,
124 &VNET_NAME(udp_log_in_vain), 0, "Log all incoming UDP packets");
125
126 VNET_DEFINE(int, udp_blackhole) = 0;
127 SYSCTL_INT(_net_inet_udp, OID_AUTO, blackhole, CTLFLAG_VNET | CTLFLAG_RW,
128 &VNET_NAME(udp_blackhole), 0,
129 "Do not send port unreachables for refused connects");
130
131 u_long udp_sendspace = 9216; /* really max datagram size */
132 SYSCTL_ULONG(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, CTLFLAG_RW,
133 &udp_sendspace, 0, "Maximum outgoing UDP datagram size");
134
135 u_long udp_recvspace = 40 * (1024 +
136 #ifdef INET6
137 sizeof(struct sockaddr_in6)
138 #else
139 sizeof(struct sockaddr_in)
140 #endif
141 ); /* 40 1K datagrams */
142
143 SYSCTL_ULONG(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
144 &udp_recvspace, 0, "Maximum space for incoming UDP datagrams");
145
146 VNET_DEFINE(struct inpcbhead, udb); /* from udp_var.h */
147 VNET_DEFINE(struct inpcbinfo, udbinfo);
148 VNET_DEFINE(struct inpcbhead, ulitecb);
149 VNET_DEFINE(struct inpcbinfo, ulitecbinfo);
150 VNET_DEFINE_STATIC(uma_zone_t, udpcb_zone);
151 #define V_udpcb_zone VNET(udpcb_zone)
152
153 #ifndef UDBHASHSIZE
154 #define UDBHASHSIZE 128
155 #endif
156
157 VNET_PCPUSTAT_DEFINE(struct udpstat, udpstat); /* from udp_var.h */
158 VNET_PCPUSTAT_SYSINIT(udpstat);
159 SYSCTL_VNET_PCPUSTAT(_net_inet_udp, UDPCTL_STATS, stats, struct udpstat,
160 udpstat, "UDP statistics (struct udpstat, netinet/udp_var.h)");
161
162 #ifdef VIMAGE
163 VNET_PCPUSTAT_SYSUNINIT(udpstat);
164 #endif /* VIMAGE */
165 #ifdef INET
166 static void udp_detach(struct socket *so);
167 static int udp_output(struct inpcb *, struct mbuf *, struct sockaddr *,
168 struct mbuf *, struct thread *, int);
169 #endif
170
171 static void
udp_zone_change(void * tag)172 udp_zone_change(void *tag)
173 {
174
175 uma_zone_set_max(V_udbinfo.ipi_zone, maxsockets);
176 uma_zone_set_max(V_udpcb_zone, maxsockets);
177 }
178
179 static int
udp_inpcb_init(void * mem,int size,int flags)180 udp_inpcb_init(void *mem, int size, int flags)
181 {
182 struct inpcb *inp;
183
184 inp = mem;
185 INP_LOCK_INIT(inp, "inp", "udpinp");
186 return (0);
187 }
188
189 static int
udplite_inpcb_init(void * mem,int size,int flags)190 udplite_inpcb_init(void *mem, int size, int flags)
191 {
192 struct inpcb *inp;
193
194 inp = mem;
195 INP_LOCK_INIT(inp, "inp", "udpliteinp");
196 return (0);
197 }
198
199 void
udp_init(void)200 udp_init(void)
201 {
202
203 /*
204 * For now default to 2-tuple UDP hashing - until the fragment
205 * reassembly code can also update the flowid.
206 *
207 * Once we can calculate the flowid that way and re-establish
208 * a 4-tuple, flip this to 4-tuple.
209 */
210 in_pcbinfo_init(&V_udbinfo, "udp", &V_udb, UDBHASHSIZE, UDBHASHSIZE,
211 "udp_inpcb", udp_inpcb_init, IPI_HASHFIELDS_2TUPLE);
212 V_udpcb_zone = uma_zcreate("udpcb", sizeof(struct udpcb),
213 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
214 uma_zone_set_max(V_udpcb_zone, maxsockets);
215 uma_zone_set_warning(V_udpcb_zone, "kern.ipc.maxsockets limit reached");
216 EVENTHANDLER_REGISTER(maxsockets_change, udp_zone_change, NULL,
217 EVENTHANDLER_PRI_ANY);
218 }
219
220 void
udplite_init(void)221 udplite_init(void)
222 {
223
224 in_pcbinfo_init(&V_ulitecbinfo, "udplite", &V_ulitecb, UDBHASHSIZE,
225 UDBHASHSIZE, "udplite_inpcb", udplite_inpcb_init,
226 IPI_HASHFIELDS_2TUPLE);
227 }
228
229 /*
230 * Kernel module interface for updating udpstat. The argument is an index
231 * into udpstat treated as an array of u_long. While this encodes the
232 * general layout of udpstat into the caller, it doesn't encode its location,
233 * so that future changes to add, for example, per-CPU stats support won't
234 * cause binary compatibility problems for kernel modules.
235 */
236 void
kmod_udpstat_inc(int statnum)237 kmod_udpstat_inc(int statnum)
238 {
239
240 counter_u64_add(VNET(udpstat)[statnum], 1);
241 }
242
243 int
udp_newudpcb(struct inpcb * inp)244 udp_newudpcb(struct inpcb *inp)
245 {
246 struct udpcb *up;
247
248 up = uma_zalloc(V_udpcb_zone, M_NOWAIT | M_ZERO);
249 if (up == NULL)
250 return (ENOBUFS);
251 inp->inp_ppcb = up;
252 return (0);
253 }
254
255 void
udp_discardcb(struct udpcb * up)256 udp_discardcb(struct udpcb *up)
257 {
258
259 uma_zfree(V_udpcb_zone, up);
260 }
261
262 #ifdef VIMAGE
263 static void
udp_destroy(void * unused __unused)264 udp_destroy(void *unused __unused)
265 {
266
267 in_pcbinfo_destroy(&V_udbinfo);
268 uma_zdestroy(V_udpcb_zone);
269 }
270 VNET_SYSUNINIT(udp, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, udp_destroy, NULL);
271
272 static void
udplite_destroy(void * unused __unused)273 udplite_destroy(void *unused __unused)
274 {
275
276 in_pcbinfo_destroy(&V_ulitecbinfo);
277 }
278 VNET_SYSUNINIT(udplite, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, udplite_destroy,
279 NULL);
280 #endif
281
282 #ifdef INET
283 /*
284 * Subroutine of udp_input(), which appends the provided mbuf chain to the
285 * passed pcb/socket. The caller must provide a sockaddr_in via udp_in that
286 * contains the source address. If the socket ends up being an IPv6 socket,
287 * udp_append() will convert to a sockaddr_in6 before passing the address
288 * into the socket code.
289 *
290 * In the normal case udp_append() will return 0, indicating that you
291 * must unlock the inp. However if a tunneling protocol is in place we increment
292 * the inpcb refcnt and unlock the inp, on return from the tunneling protocol we
293 * then decrement the reference count. If the inp_rele returns 1, indicating the
294 * inp is gone, we return that to the caller to tell them *not* to unlock
295 * the inp. In the case of multi-cast this will cause the distribution
296 * to stop (though most tunneling protocols known currently do *not* use
297 * multicast).
298 */
299 static int
udp_append(struct inpcb * inp,struct ip * ip,struct mbuf * n,int off,struct sockaddr_in * udp_in)300 udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off,
301 struct sockaddr_in *udp_in)
302 {
303 struct sockaddr *append_sa;
304 struct socket *so;
305 struct mbuf *tmpopts, *opts = NULL;
306 #ifdef INET6
307 struct sockaddr_in6 udp_in6;
308 #endif
309 struct udpcb *up;
310
311 INP_LOCK_ASSERT(inp);
312
313 /*
314 * Engage the tunneling protocol.
315 */
316 up = intoudpcb(inp);
317 if (up->u_tun_func != NULL) {
318 in_pcbref(inp);
319 INP_RUNLOCK(inp);
320 (*up->u_tun_func)(n, off, inp, (struct sockaddr *)&udp_in[0],
321 up->u_tun_ctx);
322 INP_RLOCK(inp);
323 return (in_pcbrele_rlocked(inp));
324 }
325
326 off += sizeof(struct udphdr);
327
328 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
329 /* Check AH/ESP integrity. */
330 if (IPSEC_ENABLED(ipv4) &&
331 IPSEC_CHECK_POLICY(ipv4, n, inp) != 0) {
332 m_freem(n);
333 return (0);
334 }
335 if (up->u_flags & UF_ESPINUDP) {/* IPSec UDP encaps. */
336 if (IPSEC_ENABLED(ipv4) &&
337 UDPENCAP_INPUT(n, off, AF_INET) != 0)
338 return (0); /* Consumed. */
339 }
340 #endif /* IPSEC */
341 #ifdef MAC
342 if (mac_inpcb_check_deliver(inp, n) != 0) {
343 m_freem(n);
344 return (0);
345 }
346 #endif /* MAC */
347 if (inp->inp_flags & INP_CONTROLOPTS ||
348 inp->inp_socket->so_options & (SO_TIMESTAMP | SO_BINTIME)) {
349 #ifdef INET6
350 if (inp->inp_vflag & INP_IPV6)
351 (void)ip6_savecontrol_v4(inp, n, &opts, NULL);
352 else
353 #endif /* INET6 */
354 ip_savecontrol(inp, &opts, ip, n);
355 }
356 if ((inp->inp_vflag & INP_IPV4) && (inp->inp_flags2 & INP_ORIGDSTADDR)) {
357 tmpopts = sbcreatecontrol((caddr_t)&udp_in[1],
358 sizeof(struct sockaddr_in), IP_ORIGDSTADDR, IPPROTO_IP);
359 if (tmpopts) {
360 if (opts) {
361 tmpopts->m_next = opts;
362 opts = tmpopts;
363 } else
364 opts = tmpopts;
365 }
366 }
367 #ifdef INET6
368 if (inp->inp_vflag & INP_IPV6) {
369 bzero(&udp_in6, sizeof(udp_in6));
370 udp_in6.sin6_len = sizeof(udp_in6);
371 udp_in6.sin6_family = AF_INET6;
372 in6_sin_2_v4mapsin6(&udp_in[0], &udp_in6);
373 append_sa = (struct sockaddr *)&udp_in6;
374 } else
375 #endif /* INET6 */
376 append_sa = (struct sockaddr *)&udp_in[0];
377 m_adj(n, off);
378
379 so = inp->inp_socket;
380 SOCKBUF_LOCK(&so->so_rcv);
381 if (sbappendaddr_locked(&so->so_rcv, append_sa, n, opts) == 0) {
382 SOCKBUF_UNLOCK(&so->so_rcv);
383 m_freem(n);
384 if (opts)
385 m_freem(opts);
386 UDPSTAT_INC(udps_fullsock);
387 } else
388 sorwakeup_locked(so);
389 return (0);
390 }
391
392 int
udp_input(struct mbuf ** mp,int * offp,int proto)393 udp_input(struct mbuf **mp, int *offp, int proto)
394 {
395 struct ip *ip;
396 struct udphdr *uh;
397 struct ifnet *ifp;
398 struct inpcb *inp;
399 uint16_t len, ip_len;
400 struct inpcbinfo *pcbinfo;
401 struct ip save_ip;
402 struct sockaddr_in udp_in[2];
403 struct mbuf *m;
404 struct m_tag *fwd_tag;
405 int cscov_partial, iphlen;
406
407 m = *mp;
408 iphlen = *offp;
409 ifp = m->m_pkthdr.rcvif;
410 *mp = NULL;
411 UDPSTAT_INC(udps_ipackets);
412
413 /*
414 * Strip IP options, if any; should skip this, make available to
415 * user, and use on returned packets, but we don't yet have a way to
416 * check the checksum with options still present.
417 */
418 if (iphlen > sizeof (struct ip)) {
419 ip_stripoptions(m);
420 iphlen = sizeof(struct ip);
421 }
422
423 /*
424 * Get IP and UDP header together in first mbuf.
425 */
426 if (m->m_len < iphlen + sizeof(struct udphdr)) {
427 if ((m = m_pullup(m, iphlen + sizeof(struct udphdr))) == NULL) {
428 UDPSTAT_INC(udps_hdrops);
429 return (IPPROTO_DONE);
430 }
431 }
432 ip = mtod(m, struct ip *);
433 uh = (struct udphdr *)((caddr_t)ip + iphlen);
434 cscov_partial = (proto == IPPROTO_UDPLITE) ? 1 : 0;
435
436 /*
437 * Destination port of 0 is illegal, based on RFC768.
438 */
439 if (uh->uh_dport == 0)
440 goto badunlocked;
441
442 /*
443 * Construct sockaddr format source address. Stuff source address
444 * and datagram in user buffer.
445 */
446 bzero(&udp_in[0], sizeof(struct sockaddr_in) * 2);
447 udp_in[0].sin_len = sizeof(struct sockaddr_in);
448 udp_in[0].sin_family = AF_INET;
449 udp_in[0].sin_port = uh->uh_sport;
450 udp_in[0].sin_addr = ip->ip_src;
451 udp_in[1].sin_len = sizeof(struct sockaddr_in);
452 udp_in[1].sin_family = AF_INET;
453 udp_in[1].sin_port = uh->uh_dport;
454 udp_in[1].sin_addr = ip->ip_dst;
455
456 /*
457 * Make mbuf data length reflect UDP length. If not enough data to
458 * reflect UDP length, drop.
459 */
460 len = ntohs((u_short)uh->uh_ulen);
461 ip_len = ntohs(ip->ip_len) - iphlen;
462 if (proto == IPPROTO_UDPLITE && (len == 0 || len == ip_len)) {
463 /* Zero means checksum over the complete packet. */
464 if (len == 0)
465 len = ip_len;
466 cscov_partial = 0;
467 }
468 if (ip_len != len) {
469 if (len > ip_len || len < sizeof(struct udphdr)) {
470 UDPSTAT_INC(udps_badlen);
471 goto badunlocked;
472 }
473 if (proto == IPPROTO_UDP)
474 m_adj(m, len - ip_len);
475 }
476
477 /*
478 * Save a copy of the IP header in case we want restore it for
479 * sending an ICMP error message in response.
480 */
481 if (!V_udp_blackhole)
482 save_ip = *ip;
483 else
484 memset(&save_ip, 0, sizeof(save_ip));
485
486 /*
487 * Checksum extended UDP header and data.
488 */
489 if (uh->uh_sum) {
490 u_short uh_sum;
491
492 if ((m->m_pkthdr.csum_flags & CSUM_DATA_VALID) &&
493 !cscov_partial) {
494 if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
495 uh_sum = m->m_pkthdr.csum_data;
496 else
497 uh_sum = in_pseudo(ip->ip_src.s_addr,
498 ip->ip_dst.s_addr, htonl((u_short)len +
499 m->m_pkthdr.csum_data + proto));
500 uh_sum ^= 0xffff;
501 } else {
502 char b[9];
503
504 bcopy(((struct ipovly *)ip)->ih_x1, b, 9);
505 bzero(((struct ipovly *)ip)->ih_x1, 9);
506 ((struct ipovly *)ip)->ih_len = (proto == IPPROTO_UDP) ?
507 uh->uh_ulen : htons(ip_len);
508 uh_sum = in_cksum(m, len + sizeof (struct ip));
509 bcopy(b, ((struct ipovly *)ip)->ih_x1, 9);
510 }
511 if (uh_sum) {
512 UDPSTAT_INC(udps_badsum);
513 m_freem(m);
514 return (IPPROTO_DONE);
515 }
516 } else {
517 if (proto == IPPROTO_UDP) {
518 UDPSTAT_INC(udps_nosum);
519 } else {
520 /* UDPLite requires a checksum */
521 /* XXX: What is the right UDPLite MIB counter here? */
522 m_freem(m);
523 return (IPPROTO_DONE);
524 }
525 }
526
527 pcbinfo = udp_get_inpcbinfo(proto);
528 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
529 in_broadcast(ip->ip_dst, ifp)) {
530 struct inpcb *last;
531 struct inpcbhead *pcblist;
532
533 NET_EPOCH_ASSERT();
534
535 pcblist = udp_get_pcblist(proto);
536 last = NULL;
537 CK_LIST_FOREACH(inp, pcblist, inp_list) {
538 if (inp->inp_lport != uh->uh_dport)
539 continue;
540 #ifdef INET6
541 if ((inp->inp_vflag & INP_IPV4) == 0)
542 continue;
543 #endif
544 if (inp->inp_laddr.s_addr != INADDR_ANY &&
545 inp->inp_laddr.s_addr != ip->ip_dst.s_addr)
546 continue;
547 if (inp->inp_faddr.s_addr != INADDR_ANY &&
548 inp->inp_faddr.s_addr != ip->ip_src.s_addr)
549 continue;
550 if (inp->inp_fport != 0 &&
551 inp->inp_fport != uh->uh_sport)
552 continue;
553
554 INP_RLOCK(inp);
555
556 if (__predict_false(inp->inp_flags2 & INP_FREED)) {
557 INP_RUNLOCK(inp);
558 continue;
559 }
560
561 /*
562 * XXXRW: Because we weren't holding either the inpcb
563 * or the hash lock when we checked for a match
564 * before, we should probably recheck now that the
565 * inpcb lock is held.
566 */
567
568 /*
569 * Handle socket delivery policy for any-source
570 * and source-specific multicast. [RFC3678]
571 */
572 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
573 struct ip_moptions *imo;
574 struct sockaddr_in group;
575 int blocked;
576
577 imo = inp->inp_moptions;
578 if (imo == NULL) {
579 INP_RUNLOCK(inp);
580 continue;
581 }
582 bzero(&group, sizeof(struct sockaddr_in));
583 group.sin_len = sizeof(struct sockaddr_in);
584 group.sin_family = AF_INET;
585 group.sin_addr = ip->ip_dst;
586
587 blocked = imo_multi_filter(imo, ifp,
588 (struct sockaddr *)&group,
589 (struct sockaddr *)&udp_in[0]);
590 if (blocked != MCAST_PASS) {
591 if (blocked == MCAST_NOTGMEMBER)
592 IPSTAT_INC(ips_notmember);
593 if (blocked == MCAST_NOTSMEMBER ||
594 blocked == MCAST_MUTED)
595 UDPSTAT_INC(udps_filtermcast);
596 INP_RUNLOCK(inp);
597 continue;
598 }
599 }
600 if (last != NULL) {
601 struct mbuf *n;
602
603 if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) !=
604 NULL) {
605 if (proto == IPPROTO_UDPLITE)
606 UDPLITE_PROBE(receive, NULL, last, ip,
607 last, uh);
608 else
609 UDP_PROBE(receive, NULL, last, ip, last,
610 uh);
611 if (udp_append(last, ip, n, iphlen,
612 udp_in)) {
613 goto inp_lost;
614 }
615 }
616 INP_RUNLOCK(last);
617 }
618 last = inp;
619 /*
620 * Don't look for additional matches if this one does
621 * not have either the SO_REUSEPORT or SO_REUSEADDR
622 * socket options set. This heuristic avoids
623 * searching through all pcbs in the common case of a
624 * non-shared port. It assumes that an application
625 * will never clear these options after setting them.
626 */
627 if ((last->inp_socket->so_options &
628 (SO_REUSEPORT|SO_REUSEPORT_LB|SO_REUSEADDR)) == 0)
629 break;
630 }
631
632 if (last == NULL) {
633 /*
634 * No matching pcb found; discard datagram. (No need
635 * to send an ICMP Port Unreachable for a broadcast
636 * or multicast datgram.)
637 */
638 UDPSTAT_INC(udps_noportbcast);
639 if (inp)
640 INP_RUNLOCK(inp);
641 goto badunlocked;
642 }
643 if (proto == IPPROTO_UDPLITE)
644 UDPLITE_PROBE(receive, NULL, last, ip, last, uh);
645 else
646 UDP_PROBE(receive, NULL, last, ip, last, uh);
647 if (udp_append(last, ip, m, iphlen, udp_in) == 0)
648 INP_RUNLOCK(last);
649 inp_lost:
650 return (IPPROTO_DONE);
651 }
652
653 /*
654 * Locate pcb for datagram.
655 */
656
657 /*
658 * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain.
659 */
660 if ((m->m_flags & M_IP_NEXTHOP) &&
661 (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) {
662 struct sockaddr_in *next_hop;
663
664 next_hop = (struct sockaddr_in *)(fwd_tag + 1);
665
666 /*
667 * Transparently forwarded. Pretend to be the destination.
668 * Already got one like this?
669 */
670 inp = in_pcblookup_mbuf(pcbinfo, ip->ip_src, uh->uh_sport,
671 ip->ip_dst, uh->uh_dport, INPLOOKUP_RLOCKPCB, ifp, m);
672 if (!inp) {
673 /*
674 * It's new. Try to find the ambushing socket.
675 * Because we've rewritten the destination address,
676 * any hardware-generated hash is ignored.
677 */
678 inp = in_pcblookup(pcbinfo, ip->ip_src,
679 uh->uh_sport, next_hop->sin_addr,
680 next_hop->sin_port ? htons(next_hop->sin_port) :
681 uh->uh_dport, INPLOOKUP_WILDCARD |
682 INPLOOKUP_RLOCKPCB, ifp);
683 }
684 /* Remove the tag from the packet. We don't need it anymore. */
685 m_tag_delete(m, fwd_tag);
686 m->m_flags &= ~M_IP_NEXTHOP;
687 } else
688 inp = in_pcblookup_mbuf(pcbinfo, ip->ip_src, uh->uh_sport,
689 ip->ip_dst, uh->uh_dport, INPLOOKUP_WILDCARD |
690 INPLOOKUP_RLOCKPCB, ifp, m);
691 if (inp == NULL) {
692 if (V_udp_log_in_vain) {
693 char src[INET_ADDRSTRLEN];
694 char dst[INET_ADDRSTRLEN];
695
696 log(LOG_INFO,
697 "Connection attempt to UDP %s:%d from %s:%d\n",
698 inet_ntoa_r(ip->ip_dst, dst), ntohs(uh->uh_dport),
699 inet_ntoa_r(ip->ip_src, src), ntohs(uh->uh_sport));
700 }
701 if (proto == IPPROTO_UDPLITE)
702 UDPLITE_PROBE(receive, NULL, NULL, ip, NULL, uh);
703 else
704 UDP_PROBE(receive, NULL, NULL, ip, NULL, uh);
705 UDPSTAT_INC(udps_noport);
706 if (m->m_flags & (M_BCAST | M_MCAST)) {
707 UDPSTAT_INC(udps_noportbcast);
708 goto badunlocked;
709 }
710 if (V_udp_blackhole)
711 goto badunlocked;
712 if (badport_bandlim(BANDLIM_ICMP_UNREACH) < 0)
713 goto badunlocked;
714 *ip = save_ip;
715 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0);
716 return (IPPROTO_DONE);
717 }
718
719 /*
720 * Check the minimum TTL for socket.
721 */
722 INP_RLOCK_ASSERT(inp);
723 if (inp->inp_ip_minttl && inp->inp_ip_minttl > ip->ip_ttl) {
724 if (proto == IPPROTO_UDPLITE)
725 UDPLITE_PROBE(receive, NULL, inp, ip, inp, uh);
726 else
727 UDP_PROBE(receive, NULL, inp, ip, inp, uh);
728 INP_RUNLOCK(inp);
729 m_freem(m);
730 return (IPPROTO_DONE);
731 }
732 if (cscov_partial) {
733 struct udpcb *up;
734
735 up = intoudpcb(inp);
736 if (up->u_rxcslen == 0 || up->u_rxcslen > len) {
737 INP_RUNLOCK(inp);
738 m_freem(m);
739 return (IPPROTO_DONE);
740 }
741 }
742
743 if (proto == IPPROTO_UDPLITE)
744 UDPLITE_PROBE(receive, NULL, inp, ip, inp, uh);
745 else
746 UDP_PROBE(receive, NULL, inp, ip, inp, uh);
747 if (udp_append(inp, ip, m, iphlen, udp_in) == 0)
748 INP_RUNLOCK(inp);
749 return (IPPROTO_DONE);
750
751 badunlocked:
752 m_freem(m);
753 return (IPPROTO_DONE);
754 }
755 #endif /* INET */
756
757 /*
758 * Notify a udp user of an asynchronous error; just wake up so that they can
759 * collect error status.
760 */
761 struct inpcb *
udp_notify(struct inpcb * inp,int errno)762 udp_notify(struct inpcb *inp, int errno)
763 {
764
765 INP_WLOCK_ASSERT(inp);
766 if ((errno == EHOSTUNREACH || errno == ENETUNREACH ||
767 errno == EHOSTDOWN) && inp->inp_route.ro_nh) {
768 NH_FREE(inp->inp_route.ro_nh);
769 inp->inp_route.ro_nh = (struct nhop_object *)NULL;
770 }
771
772 inp->inp_socket->so_error = errno;
773 sorwakeup(inp->inp_socket);
774 sowwakeup(inp->inp_socket);
775 return (inp);
776 }
777
778 #ifdef INET
779 static void
udp_common_ctlinput(int cmd,struct sockaddr * sa,void * vip,struct inpcbinfo * pcbinfo)780 udp_common_ctlinput(int cmd, struct sockaddr *sa, void *vip,
781 struct inpcbinfo *pcbinfo)
782 {
783 struct ip *ip = vip;
784 struct udphdr *uh;
785 struct in_addr faddr;
786 struct inpcb *inp;
787
788 faddr = ((struct sockaddr_in *)sa)->sin_addr;
789 if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY)
790 return;
791
792 if (PRC_IS_REDIRECT(cmd)) {
793 /* signal EHOSTDOWN, as it flushes the cached route */
794 in_pcbnotifyall(&V_udbinfo, faddr, EHOSTDOWN, udp_notify);
795 return;
796 }
797
798 /*
799 * Hostdead is ugly because it goes linearly through all PCBs.
800 *
801 * XXX: We never get this from ICMP, otherwise it makes an excellent
802 * DoS attack on machines with many connections.
803 */
804 if (cmd == PRC_HOSTDEAD)
805 ip = NULL;
806 else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0)
807 return;
808 if (ip != NULL) {
809 uh = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2));
810 inp = in_pcblookup(pcbinfo, faddr, uh->uh_dport,
811 ip->ip_src, uh->uh_sport, INPLOOKUP_WLOCKPCB, NULL);
812 if (inp != NULL) {
813 INP_WLOCK_ASSERT(inp);
814 if (inp->inp_socket != NULL) {
815 udp_notify(inp, inetctlerrmap[cmd]);
816 }
817 INP_WUNLOCK(inp);
818 } else {
819 inp = in_pcblookup(pcbinfo, faddr, uh->uh_dport,
820 ip->ip_src, uh->uh_sport,
821 INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, NULL);
822 if (inp != NULL) {
823 struct udpcb *up;
824 void *ctx;
825 udp_tun_icmp_t func;
826
827 up = intoudpcb(inp);
828 ctx = up->u_tun_ctx;
829 func = up->u_icmp_func;
830 INP_RUNLOCK(inp);
831 if (func != NULL)
832 (*func)(cmd, sa, vip, ctx);
833 }
834 }
835 } else
836 in_pcbnotifyall(pcbinfo, faddr, inetctlerrmap[cmd],
837 udp_notify);
838 }
839 void
udp_ctlinput(int cmd,struct sockaddr * sa,void * vip)840 udp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
841 {
842
843 return (udp_common_ctlinput(cmd, sa, vip, &V_udbinfo));
844 }
845
846 void
udplite_ctlinput(int cmd,struct sockaddr * sa,void * vip)847 udplite_ctlinput(int cmd, struct sockaddr *sa, void *vip)
848 {
849
850 return (udp_common_ctlinput(cmd, sa, vip, &V_ulitecbinfo));
851 }
852 #endif /* INET */
853
854 static int
udp_pcblist(SYSCTL_HANDLER_ARGS)855 udp_pcblist(SYSCTL_HANDLER_ARGS)
856 {
857 struct xinpgen xig;
858 struct epoch_tracker et;
859 struct inpcb *inp;
860 int error;
861
862 if (req->newptr != 0)
863 return (EPERM);
864
865 if (req->oldptr == 0) {
866 int n;
867
868 n = V_udbinfo.ipi_count;
869 n += imax(n / 8, 10);
870 req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb);
871 return (0);
872 }
873
874 if ((error = sysctl_wire_old_buffer(req, 0)) != 0)
875 return (error);
876
877 bzero(&xig, sizeof(xig));
878 xig.xig_len = sizeof xig;
879 xig.xig_count = V_udbinfo.ipi_count;
880 xig.xig_gen = V_udbinfo.ipi_gencnt;
881 xig.xig_sogen = so_gencnt;
882 error = SYSCTL_OUT(req, &xig, sizeof xig);
883 if (error)
884 return (error);
885
886 NET_EPOCH_ENTER(et);
887 for (inp = CK_LIST_FIRST(V_udbinfo.ipi_listhead);
888 inp != NULL;
889 inp = CK_LIST_NEXT(inp, inp_list)) {
890 INP_RLOCK(inp);
891 if (inp->inp_gencnt <= xig.xig_gen &&
892 cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
893 struct xinpcb xi;
894
895 in_pcbtoxinpcb(inp, &xi);
896 INP_RUNLOCK(inp);
897 error = SYSCTL_OUT(req, &xi, sizeof xi);
898 if (error)
899 break;
900 } else
901 INP_RUNLOCK(inp);
902 }
903 NET_EPOCH_EXIT(et);
904
905 if (!error) {
906 /*
907 * Give the user an updated idea of our state. If the
908 * generation differs from what we told her before, she knows
909 * that something happened while we were processing this
910 * request, and it might be necessary to retry.
911 */
912 xig.xig_gen = V_udbinfo.ipi_gencnt;
913 xig.xig_sogen = so_gencnt;
914 xig.xig_count = V_udbinfo.ipi_count;
915 error = SYSCTL_OUT(req, &xig, sizeof xig);
916 }
917
918 return (error);
919 }
920
921 SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist,
922 CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
923 udp_pcblist, "S,xinpcb",
924 "List of active UDP sockets");
925
926 #ifdef INET
927 static int
udp_getcred(SYSCTL_HANDLER_ARGS)928 udp_getcred(SYSCTL_HANDLER_ARGS)
929 {
930 struct xucred xuc;
931 struct sockaddr_in addrs[2];
932 struct epoch_tracker et;
933 struct inpcb *inp;
934 int error;
935
936 error = priv_check(req->td, PRIV_NETINET_GETCRED);
937 if (error)
938 return (error);
939 error = SYSCTL_IN(req, addrs, sizeof(addrs));
940 if (error)
941 return (error);
942 NET_EPOCH_ENTER(et);
943 inp = in_pcblookup(&V_udbinfo, addrs[1].sin_addr, addrs[1].sin_port,
944 addrs[0].sin_addr, addrs[0].sin_port,
945 INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, NULL);
946 NET_EPOCH_EXIT(et);
947 if (inp != NULL) {
948 INP_RLOCK_ASSERT(inp);
949 if (inp->inp_socket == NULL)
950 error = ENOENT;
951 if (error == 0)
952 error = cr_canseeinpcb(req->td->td_ucred, inp);
953 if (error == 0)
954 cru2x(inp->inp_cred, &xuc);
955 INP_RUNLOCK(inp);
956 } else
957 error = ENOENT;
958 if (error == 0)
959 error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
960 return (error);
961 }
962
963 SYSCTL_PROC(_net_inet_udp, OID_AUTO, getcred,
964 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
965 0, 0, udp_getcred, "S,xucred",
966 "Get the xucred of a UDP connection");
967 #endif /* INET */
968
969 int
udp_ctloutput(struct socket * so,struct sockopt * sopt)970 udp_ctloutput(struct socket *so, struct sockopt *sopt)
971 {
972 struct inpcb *inp;
973 struct udpcb *up;
974 int isudplite, error, optval;
975
976 error = 0;
977 isudplite = (so->so_proto->pr_protocol == IPPROTO_UDPLITE) ? 1 : 0;
978 inp = sotoinpcb(so);
979 KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
980 INP_WLOCK(inp);
981 if (sopt->sopt_level != so->so_proto->pr_protocol) {
982 #ifdef INET6
983 if (INP_CHECK_SOCKAF(so, AF_INET6)) {
984 INP_WUNLOCK(inp);
985 error = ip6_ctloutput(so, sopt);
986 }
987 #endif
988 #if defined(INET) && defined(INET6)
989 else
990 #endif
991 #ifdef INET
992 {
993 INP_WUNLOCK(inp);
994 error = ip_ctloutput(so, sopt);
995 }
996 #endif
997 return (error);
998 }
999
1000 switch (sopt->sopt_dir) {
1001 case SOPT_SET:
1002 switch (sopt->sopt_name) {
1003 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
1004 #ifdef INET
1005 case UDP_ENCAP:
1006 if (!IPSEC_ENABLED(ipv4)) {
1007 INP_WUNLOCK(inp);
1008 return (ENOPROTOOPT);
1009 }
1010 error = UDPENCAP_PCBCTL(inp, sopt);
1011 break;
1012 #endif /* INET */
1013 #endif /* IPSEC */
1014 case UDPLITE_SEND_CSCOV:
1015 case UDPLITE_RECV_CSCOV:
1016 if (!isudplite) {
1017 INP_WUNLOCK(inp);
1018 error = ENOPROTOOPT;
1019 break;
1020 }
1021 INP_WUNLOCK(inp);
1022 error = sooptcopyin(sopt, &optval, sizeof(optval),
1023 sizeof(optval));
1024 if (error != 0)
1025 break;
1026 inp = sotoinpcb(so);
1027 KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
1028 INP_WLOCK(inp);
1029 up = intoudpcb(inp);
1030 KASSERT(up != NULL, ("%s: up == NULL", __func__));
1031 if ((optval != 0 && optval < 8) || (optval > 65535)) {
1032 INP_WUNLOCK(inp);
1033 error = EINVAL;
1034 break;
1035 }
1036 if (sopt->sopt_name == UDPLITE_SEND_CSCOV)
1037 up->u_txcslen = optval;
1038 else
1039 up->u_rxcslen = optval;
1040 INP_WUNLOCK(inp);
1041 break;
1042 default:
1043 INP_WUNLOCK(inp);
1044 error = ENOPROTOOPT;
1045 break;
1046 }
1047 break;
1048 case SOPT_GET:
1049 switch (sopt->sopt_name) {
1050 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
1051 #ifdef INET
1052 case UDP_ENCAP:
1053 if (!IPSEC_ENABLED(ipv4)) {
1054 INP_WUNLOCK(inp);
1055 return (ENOPROTOOPT);
1056 }
1057 error = UDPENCAP_PCBCTL(inp, sopt);
1058 break;
1059 #endif /* INET */
1060 #endif /* IPSEC */
1061 case UDPLITE_SEND_CSCOV:
1062 case UDPLITE_RECV_CSCOV:
1063 if (!isudplite) {
1064 INP_WUNLOCK(inp);
1065 error = ENOPROTOOPT;
1066 break;
1067 }
1068 up = intoudpcb(inp);
1069 KASSERT(up != NULL, ("%s: up == NULL", __func__));
1070 if (sopt->sopt_name == UDPLITE_SEND_CSCOV)
1071 optval = up->u_txcslen;
1072 else
1073 optval = up->u_rxcslen;
1074 INP_WUNLOCK(inp);
1075 error = sooptcopyout(sopt, &optval, sizeof(optval));
1076 break;
1077 default:
1078 INP_WUNLOCK(inp);
1079 error = ENOPROTOOPT;
1080 break;
1081 }
1082 break;
1083 }
1084 return (error);
1085 }
1086
1087 #ifdef INET
1088 #ifdef INET6
1089 /* The logic here is derived from ip6_setpktopt(). See comments there. */
1090 static int
udp_v4mapped_pktinfo(struct cmsghdr * cm,struct sockaddr_in * src,struct inpcb * inp,int flags)1091 udp_v4mapped_pktinfo(struct cmsghdr *cm, struct sockaddr_in * src,
1092 struct inpcb *inp, int flags)
1093 {
1094 struct ifnet *ifp;
1095 struct in6_pktinfo *pktinfo;
1096 struct in_addr ia;
1097
1098 if ((flags & PRUS_IPV6) == 0)
1099 return (0);
1100
1101 if (cm->cmsg_level != IPPROTO_IPV6)
1102 return (0);
1103
1104 if (cm->cmsg_type != IPV6_2292PKTINFO &&
1105 cm->cmsg_type != IPV6_PKTINFO)
1106 return (0);
1107
1108 if (cm->cmsg_len !=
1109 CMSG_LEN(sizeof(struct in6_pktinfo)))
1110 return (EINVAL);
1111
1112 pktinfo = (struct in6_pktinfo *)CMSG_DATA(cm);
1113 if (!IN6_IS_ADDR_V4MAPPED(&pktinfo->ipi6_addr) &&
1114 !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr))
1115 return (EINVAL);
1116
1117 /* Validate the interface index if specified. */
1118 if (pktinfo->ipi6_ifindex > V_if_index)
1119 return (ENXIO);
1120
1121 ifp = NULL;
1122 if (pktinfo->ipi6_ifindex) {
1123 ifp = ifnet_byindex(pktinfo->ipi6_ifindex);
1124 if (ifp == NULL)
1125 return (ENXIO);
1126 }
1127 if (ifp != NULL && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
1128 ia.s_addr = pktinfo->ipi6_addr.s6_addr32[3];
1129 if (in_ifhasaddr(ifp, ia) == 0)
1130 return (EADDRNOTAVAIL);
1131 }
1132
1133 bzero(src, sizeof(*src));
1134 src->sin_family = AF_INET;
1135 src->sin_len = sizeof(*src);
1136 src->sin_port = inp->inp_lport;
1137 src->sin_addr.s_addr = pktinfo->ipi6_addr.s6_addr32[3];
1138
1139 return (0);
1140 }
1141 #endif
1142
1143 static int
udp_output(struct inpcb * inp,struct mbuf * m,struct sockaddr * addr,struct mbuf * control,struct thread * td,int flags)1144 udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
1145 struct mbuf *control, struct thread *td, int flags)
1146 {
1147 struct udpiphdr *ui;
1148 int len = m->m_pkthdr.len;
1149 struct in_addr faddr, laddr;
1150 struct cmsghdr *cm;
1151 struct inpcbinfo *pcbinfo;
1152 struct sockaddr_in *sin, src;
1153 struct epoch_tracker et;
1154 int cscov_partial = 0;
1155 int error = 0;
1156 int ipflags = 0;
1157 u_short fport, lport;
1158 u_char tos;
1159 uint8_t pr;
1160 uint16_t cscov = 0;
1161 uint32_t flowid = 0;
1162 uint8_t flowtype = M_HASHTYPE_NONE;
1163
1164 if (len + sizeof(struct udpiphdr) > IP_MAXPACKET) {
1165 if (control)
1166 m_freem(control);
1167 m_freem(m);
1168 return (EMSGSIZE);
1169 }
1170
1171 src.sin_family = 0;
1172 sin = (struct sockaddr_in *)addr;
1173
1174 /*
1175 * udp_output() may need to temporarily bind or connect the current
1176 * inpcb. As such, we don't know up front whether we will need the
1177 * pcbinfo lock or not. Do any work to decide what is needed up
1178 * front before acquiring any locks.
1179 *
1180 * We will need network epoch in either case, to safely lookup into
1181 * pcb hash.
1182 */
1183 if (sin == NULL ||
1184 (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0))
1185 INP_WLOCK(inp);
1186 else
1187 INP_RLOCK(inp);
1188 NET_EPOCH_ENTER(et);
1189 tos = inp->inp_ip_tos;
1190 if (control != NULL) {
1191 /*
1192 * XXX: Currently, we assume all the optional information is
1193 * stored in a single mbuf.
1194 */
1195 if (control->m_next) {
1196 m_freem(control);
1197 error = EINVAL;
1198 goto release;
1199 }
1200 for (; control->m_len > 0;
1201 control->m_data += CMSG_ALIGN(cm->cmsg_len),
1202 control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
1203 cm = mtod(control, struct cmsghdr *);
1204 if (control->m_len < sizeof(*cm) || cm->cmsg_len == 0
1205 || cm->cmsg_len > control->m_len) {
1206 error = EINVAL;
1207 break;
1208 }
1209 #ifdef INET6
1210 error = udp_v4mapped_pktinfo(cm, &src, inp, flags);
1211 if (error != 0)
1212 break;
1213 #endif
1214 if (cm->cmsg_level != IPPROTO_IP)
1215 continue;
1216
1217 switch (cm->cmsg_type) {
1218 case IP_SENDSRCADDR:
1219 if (cm->cmsg_len !=
1220 CMSG_LEN(sizeof(struct in_addr))) {
1221 error = EINVAL;
1222 break;
1223 }
1224 bzero(&src, sizeof(src));
1225 src.sin_family = AF_INET;
1226 src.sin_len = sizeof(src);
1227 src.sin_port = inp->inp_lport;
1228 src.sin_addr =
1229 *(struct in_addr *)CMSG_DATA(cm);
1230 break;
1231
1232 case IP_TOS:
1233 if (cm->cmsg_len != CMSG_LEN(sizeof(u_char))) {
1234 error = EINVAL;
1235 break;
1236 }
1237 tos = *(u_char *)CMSG_DATA(cm);
1238 break;
1239
1240 case IP_FLOWID:
1241 if (cm->cmsg_len != CMSG_LEN(sizeof(uint32_t))) {
1242 error = EINVAL;
1243 break;
1244 }
1245 flowid = *(uint32_t *) CMSG_DATA(cm);
1246 break;
1247
1248 case IP_FLOWTYPE:
1249 if (cm->cmsg_len != CMSG_LEN(sizeof(uint32_t))) {
1250 error = EINVAL;
1251 break;
1252 }
1253 flowtype = *(uint32_t *) CMSG_DATA(cm);
1254 break;
1255
1256 #ifdef RSS
1257 case IP_RSSBUCKETID:
1258 if (cm->cmsg_len != CMSG_LEN(sizeof(uint32_t))) {
1259 error = EINVAL;
1260 break;
1261 }
1262 /* This is just a placeholder for now */
1263 break;
1264 #endif /* RSS */
1265 default:
1266 error = ENOPROTOOPT;
1267 break;
1268 }
1269 if (error)
1270 break;
1271 }
1272 m_freem(control);
1273 }
1274 if (error)
1275 goto release;
1276
1277 pr = inp->inp_socket->so_proto->pr_protocol;
1278 pcbinfo = udp_get_inpcbinfo(pr);
1279
1280 /*
1281 * If the IP_SENDSRCADDR control message was specified, override the
1282 * source address for this datagram. Its use is invalidated if the
1283 * address thus specified is incomplete or clobbers other inpcbs.
1284 */
1285 laddr = inp->inp_laddr;
1286 lport = inp->inp_lport;
1287 if (src.sin_family == AF_INET) {
1288 INP_HASH_LOCK_ASSERT(pcbinfo);
1289 if ((lport == 0) ||
1290 (laddr.s_addr == INADDR_ANY &&
1291 src.sin_addr.s_addr == INADDR_ANY)) {
1292 error = EINVAL;
1293 goto release;
1294 }
1295 error = in_pcbbind_setup(inp, (struct sockaddr *)&src,
1296 &laddr.s_addr, &lport, td->td_ucred);
1297 if (error)
1298 goto release;
1299 }
1300
1301 /*
1302 * If a UDP socket has been connected, then a local address/port will
1303 * have been selected and bound.
1304 *
1305 * If a UDP socket has not been connected to, then an explicit
1306 * destination address must be used, in which case a local
1307 * address/port may not have been selected and bound.
1308 */
1309 if (sin != NULL) {
1310 INP_LOCK_ASSERT(inp);
1311 if (inp->inp_faddr.s_addr != INADDR_ANY) {
1312 error = EISCONN;
1313 goto release;
1314 }
1315
1316 /*
1317 * Jail may rewrite the destination address, so let it do
1318 * that before we use it.
1319 */
1320 error = prison_remote_ip4(td->td_ucred, &sin->sin_addr);
1321 if (error)
1322 goto release;
1323
1324 /*
1325 * If a local address or port hasn't yet been selected, or if
1326 * the destination address needs to be rewritten due to using
1327 * a special INADDR_ constant, invoke in_pcbconnect_setup()
1328 * to do the heavy lifting. Once a port is selected, we
1329 * commit the binding back to the socket; we also commit the
1330 * binding of the address if in jail.
1331 *
1332 * If we already have a valid binding and we're not
1333 * requesting a destination address rewrite, use a fast path.
1334 */
1335 if (inp->inp_laddr.s_addr == INADDR_ANY ||
1336 inp->inp_lport == 0 ||
1337 sin->sin_addr.s_addr == INADDR_ANY ||
1338 sin->sin_addr.s_addr == INADDR_BROADCAST) {
1339 INP_HASH_LOCK_ASSERT(pcbinfo);
1340 error = in_pcbconnect_setup(inp, addr, &laddr.s_addr,
1341 &lport, &faddr.s_addr, &fport, NULL,
1342 td->td_ucred);
1343 if (error)
1344 goto release;
1345
1346 /*
1347 * XXXRW: Why not commit the port if the address is
1348 * !INADDR_ANY?
1349 */
1350 /* Commit the local port if newly assigned. */
1351 if (inp->inp_laddr.s_addr == INADDR_ANY &&
1352 inp->inp_lport == 0) {
1353 INP_WLOCK_ASSERT(inp);
1354 /*
1355 * Remember addr if jailed, to prevent
1356 * rebinding.
1357 */
1358 if (prison_flag(td->td_ucred, PR_IP4))
1359 inp->inp_laddr = laddr;
1360 inp->inp_lport = lport;
1361 INP_HASH_WLOCK(pcbinfo);
1362 error = in_pcbinshash(inp);
1363 INP_HASH_WUNLOCK(pcbinfo);
1364 if (error != 0) {
1365 inp->inp_lport = 0;
1366 error = EAGAIN;
1367 goto release;
1368 }
1369 inp->inp_flags |= INP_ANONPORT;
1370 }
1371 } else {
1372 faddr = sin->sin_addr;
1373 fport = sin->sin_port;
1374 }
1375 } else {
1376 INP_LOCK_ASSERT(inp);
1377 faddr = inp->inp_faddr;
1378 fport = inp->inp_fport;
1379 if (faddr.s_addr == INADDR_ANY) {
1380 error = ENOTCONN;
1381 goto release;
1382 }
1383 }
1384
1385 /*
1386 * Calculate data length and get a mbuf for UDP, IP, and possible
1387 * link-layer headers. Immediate slide the data pointer back forward
1388 * since we won't use that space at this layer.
1389 */
1390 M_PREPEND(m, sizeof(struct udpiphdr) + max_linkhdr, M_NOWAIT);
1391 if (m == NULL) {
1392 error = ENOBUFS;
1393 goto release;
1394 }
1395 m->m_data += max_linkhdr;
1396 m->m_len -= max_linkhdr;
1397 m->m_pkthdr.len -= max_linkhdr;
1398
1399 /*
1400 * Fill in mbuf with extended UDP header and addresses and length put
1401 * into network format.
1402 */
1403 ui = mtod(m, struct udpiphdr *);
1404 bzero(ui->ui_x1, sizeof(ui->ui_x1)); /* XXX still needed? */
1405 ui->ui_v = IPVERSION << 4;
1406 ui->ui_pr = pr;
1407 ui->ui_src = laddr;
1408 ui->ui_dst = faddr;
1409 ui->ui_sport = lport;
1410 ui->ui_dport = fport;
1411 ui->ui_ulen = htons((u_short)len + sizeof(struct udphdr));
1412 if (pr == IPPROTO_UDPLITE) {
1413 struct udpcb *up;
1414 uint16_t plen;
1415
1416 up = intoudpcb(inp);
1417 cscov = up->u_txcslen;
1418 plen = (u_short)len + sizeof(struct udphdr);
1419 if (cscov >= plen)
1420 cscov = 0;
1421 ui->ui_len = htons(plen);
1422 ui->ui_ulen = htons(cscov);
1423 /*
1424 * For UDP-Lite, checksum coverage length of zero means
1425 * the entire UDPLite packet is covered by the checksum.
1426 */
1427 cscov_partial = (cscov == 0) ? 0 : 1;
1428 }
1429
1430 /*
1431 * Set the Don't Fragment bit in the IP header.
1432 */
1433 if (inp->inp_flags & INP_DONTFRAG) {
1434 struct ip *ip;
1435
1436 ip = (struct ip *)&ui->ui_i;
1437 ip->ip_off |= htons(IP_DF);
1438 }
1439
1440 if (inp->inp_socket->so_options & SO_DONTROUTE)
1441 ipflags |= IP_ROUTETOIF;
1442 if (inp->inp_socket->so_options & SO_BROADCAST)
1443 ipflags |= IP_ALLOWBROADCAST;
1444 if (inp->inp_flags & INP_ONESBCAST)
1445 ipflags |= IP_SENDONES;
1446
1447 #ifdef MAC
1448 mac_inpcb_create_mbuf(inp, m);
1449 #endif
1450
1451 /*
1452 * Set up checksum and output datagram.
1453 */
1454 ui->ui_sum = 0;
1455 if (pr == IPPROTO_UDPLITE) {
1456 if (inp->inp_flags & INP_ONESBCAST)
1457 faddr.s_addr = INADDR_BROADCAST;
1458 if (cscov_partial) {
1459 if ((ui->ui_sum = in_cksum(m, sizeof(struct ip) + cscov)) == 0)
1460 ui->ui_sum = 0xffff;
1461 } else {
1462 if ((ui->ui_sum = in_cksum(m, sizeof(struct udpiphdr) + len)) == 0)
1463 ui->ui_sum = 0xffff;
1464 }
1465 } else if (V_udp_cksum) {
1466 if (inp->inp_flags & INP_ONESBCAST)
1467 faddr.s_addr = INADDR_BROADCAST;
1468 ui->ui_sum = in_pseudo(ui->ui_src.s_addr, faddr.s_addr,
1469 htons((u_short)len + sizeof(struct udphdr) + pr));
1470 m->m_pkthdr.csum_flags = CSUM_UDP;
1471 m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
1472 }
1473 ((struct ip *)ui)->ip_len = htons(sizeof(struct udpiphdr) + len);
1474 ((struct ip *)ui)->ip_ttl = inp->inp_ip_ttl; /* XXX */
1475 ((struct ip *)ui)->ip_tos = tos; /* XXX */
1476 UDPSTAT_INC(udps_opackets);
1477
1478 /*
1479 * Setup flowid / RSS information for outbound socket.
1480 *
1481 * Once the UDP code decides to set a flowid some other way,
1482 * this allows the flowid to be overridden by userland.
1483 */
1484 if (flowtype != M_HASHTYPE_NONE) {
1485 m->m_pkthdr.flowid = flowid;
1486 M_HASHTYPE_SET(m, flowtype);
1487 }
1488 #if defined(ROUTE_MPATH) || defined(RSS)
1489 else if (CALC_FLOWID_OUTBOUND_SENDTO) {
1490 uint32_t hash_val, hash_type;
1491
1492 hash_val = fib4_calc_packet_hash(laddr, faddr,
1493 lport, fport, pr, &hash_type);
1494 m->m_pkthdr.flowid = hash_val;
1495 M_HASHTYPE_SET(m, hash_type);
1496 }
1497
1498 /*
1499 * Don't override with the inp cached flowid value.
1500 *
1501 * Depending upon the kind of send being done, the inp
1502 * flowid/flowtype values may actually not be appropriate
1503 * for this particular socket send.
1504 *
1505 * We should either leave the flowid at zero (which is what is
1506 * currently done) or set it to some software generated
1507 * hash value based on the packet contents.
1508 */
1509 ipflags |= IP_NODEFAULTFLOWID;
1510 #endif /* RSS */
1511
1512 if (pr == IPPROTO_UDPLITE)
1513 UDPLITE_PROBE(send, NULL, inp, &ui->ui_i, inp, &ui->ui_u);
1514 else
1515 UDP_PROBE(send, NULL, inp, &ui->ui_i, inp, &ui->ui_u);
1516 error = ip_output(m, inp->inp_options,
1517 INP_WLOCKED(inp) ? &inp->inp_route : NULL, ipflags,
1518 inp->inp_moptions, inp);
1519 INP_UNLOCK(inp);
1520 NET_EPOCH_EXIT(et);
1521 return (error);
1522
1523 release:
1524 INP_UNLOCK(inp);
1525 NET_EPOCH_EXIT(et);
1526 m_freem(m);
1527 return (error);
1528 }
1529
1530 static void
udp_abort(struct socket * so)1531 udp_abort(struct socket *so)
1532 {
1533 struct inpcb *inp;
1534 struct inpcbinfo *pcbinfo;
1535
1536 pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
1537 inp = sotoinpcb(so);
1538 KASSERT(inp != NULL, ("udp_abort: inp == NULL"));
1539 INP_WLOCK(inp);
1540 if (inp->inp_faddr.s_addr != INADDR_ANY) {
1541 INP_HASH_WLOCK(pcbinfo);
1542 in_pcbdisconnect(inp);
1543 inp->inp_laddr.s_addr = INADDR_ANY;
1544 INP_HASH_WUNLOCK(pcbinfo);
1545 soisdisconnected(so);
1546 }
1547 INP_WUNLOCK(inp);
1548 }
1549
1550 static int
udp_attach(struct socket * so,int proto,struct thread * td)1551 udp_attach(struct socket *so, int proto, struct thread *td)
1552 {
1553 static uint32_t udp_flowid;
1554 struct inpcb *inp;
1555 struct inpcbinfo *pcbinfo;
1556 int error;
1557
1558 pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
1559 inp = sotoinpcb(so);
1560 KASSERT(inp == NULL, ("udp_attach: inp != NULL"));
1561 error = soreserve(so, udp_sendspace, udp_recvspace);
1562 if (error)
1563 return (error);
1564 INP_INFO_WLOCK(pcbinfo);
1565 error = in_pcballoc(so, pcbinfo);
1566 if (error) {
1567 INP_INFO_WUNLOCK(pcbinfo);
1568 return (error);
1569 }
1570
1571 inp = sotoinpcb(so);
1572 inp->inp_vflag |= INP_IPV4;
1573 inp->inp_ip_ttl = V_ip_defttl;
1574 inp->inp_flowid = atomic_fetchadd_int(&udp_flowid, 1);
1575 inp->inp_flowtype = M_HASHTYPE_OPAQUE;
1576
1577 error = udp_newudpcb(inp);
1578 if (error) {
1579 in_pcbdetach(inp);
1580 in_pcbfree(inp);
1581 INP_INFO_WUNLOCK(pcbinfo);
1582 return (error);
1583 }
1584
1585 INP_WUNLOCK(inp);
1586 INP_INFO_WUNLOCK(pcbinfo);
1587 return (0);
1588 }
1589 #endif /* INET */
1590
1591 int
udp_set_kernel_tunneling(struct socket * so,udp_tun_func_t f,udp_tun_icmp_t i,void * ctx)1592 udp_set_kernel_tunneling(struct socket *so, udp_tun_func_t f, udp_tun_icmp_t i, void *ctx)
1593 {
1594 struct inpcb *inp;
1595 struct udpcb *up;
1596
1597 KASSERT(so->so_type == SOCK_DGRAM,
1598 ("udp_set_kernel_tunneling: !dgram"));
1599 inp = sotoinpcb(so);
1600 KASSERT(inp != NULL, ("udp_set_kernel_tunneling: inp == NULL"));
1601 INP_WLOCK(inp);
1602 up = intoudpcb(inp);
1603 if ((up->u_tun_func != NULL) ||
1604 (up->u_icmp_func != NULL)) {
1605 INP_WUNLOCK(inp);
1606 return (EBUSY);
1607 }
1608 up->u_tun_func = f;
1609 up->u_icmp_func = i;
1610 up->u_tun_ctx = ctx;
1611 INP_WUNLOCK(inp);
1612 return (0);
1613 }
1614
1615 #ifdef INET
1616 static int
udp_bind(struct socket * so,struct sockaddr * nam,struct thread * td)1617 udp_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
1618 {
1619 struct inpcb *inp;
1620 struct inpcbinfo *pcbinfo;
1621 int error;
1622
1623 pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
1624 inp = sotoinpcb(so);
1625 KASSERT(inp != NULL, ("udp_bind: inp == NULL"));
1626 INP_WLOCK(inp);
1627 INP_HASH_WLOCK(pcbinfo);
1628 error = in_pcbbind(inp, nam, td->td_ucred);
1629 INP_HASH_WUNLOCK(pcbinfo);
1630 INP_WUNLOCK(inp);
1631 return (error);
1632 }
1633
1634 static void
udp_close(struct socket * so)1635 udp_close(struct socket *so)
1636 {
1637 struct inpcb *inp;
1638 struct inpcbinfo *pcbinfo;
1639
1640 pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
1641 inp = sotoinpcb(so);
1642 KASSERT(inp != NULL, ("udp_close: inp == NULL"));
1643 INP_WLOCK(inp);
1644 if (inp->inp_faddr.s_addr != INADDR_ANY) {
1645 INP_HASH_WLOCK(pcbinfo);
1646 in_pcbdisconnect(inp);
1647 inp->inp_laddr.s_addr = INADDR_ANY;
1648 INP_HASH_WUNLOCK(pcbinfo);
1649 soisdisconnected(so);
1650 }
1651 INP_WUNLOCK(inp);
1652 }
1653
1654 static int
udp_connect(struct socket * so,struct sockaddr * nam,struct thread * td)1655 udp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
1656 {
1657 struct epoch_tracker et;
1658 struct inpcb *inp;
1659 struct inpcbinfo *pcbinfo;
1660 struct sockaddr_in *sin;
1661 int error;
1662
1663 pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
1664 inp = sotoinpcb(so);
1665 KASSERT(inp != NULL, ("udp_connect: inp == NULL"));
1666 INP_WLOCK(inp);
1667 if (inp->inp_faddr.s_addr != INADDR_ANY) {
1668 INP_WUNLOCK(inp);
1669 return (EISCONN);
1670 }
1671 sin = (struct sockaddr_in *)nam;
1672 error = prison_remote_ip4(td->td_ucred, &sin->sin_addr);
1673 if (error != 0) {
1674 INP_WUNLOCK(inp);
1675 return (error);
1676 }
1677 NET_EPOCH_ENTER(et);
1678 INP_HASH_WLOCK(pcbinfo);
1679 error = in_pcbconnect(inp, nam, td->td_ucred);
1680 INP_HASH_WUNLOCK(pcbinfo);
1681 NET_EPOCH_EXIT(et);
1682 if (error == 0)
1683 soisconnected(so);
1684 INP_WUNLOCK(inp);
1685 return (error);
1686 }
1687
1688 static void
udp_detach(struct socket * so)1689 udp_detach(struct socket *so)
1690 {
1691 struct inpcb *inp;
1692 struct inpcbinfo *pcbinfo;
1693 struct udpcb *up;
1694
1695 pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
1696 inp = sotoinpcb(so);
1697 KASSERT(inp != NULL, ("udp_detach: inp == NULL"));
1698 KASSERT(inp->inp_faddr.s_addr == INADDR_ANY,
1699 ("udp_detach: not disconnected"));
1700 INP_INFO_WLOCK(pcbinfo);
1701 INP_WLOCK(inp);
1702 up = intoudpcb(inp);
1703 KASSERT(up != NULL, ("%s: up == NULL", __func__));
1704 inp->inp_ppcb = NULL;
1705 in_pcbdetach(inp);
1706 in_pcbfree(inp);
1707 INP_INFO_WUNLOCK(pcbinfo);
1708 udp_discardcb(up);
1709 }
1710
1711 static int
udp_disconnect(struct socket * so)1712 udp_disconnect(struct socket *so)
1713 {
1714 struct inpcb *inp;
1715 struct inpcbinfo *pcbinfo;
1716
1717 pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
1718 inp = sotoinpcb(so);
1719 KASSERT(inp != NULL, ("udp_disconnect: inp == NULL"));
1720 INP_WLOCK(inp);
1721 if (inp->inp_faddr.s_addr == INADDR_ANY) {
1722 INP_WUNLOCK(inp);
1723 return (ENOTCONN);
1724 }
1725 INP_HASH_WLOCK(pcbinfo);
1726 in_pcbdisconnect(inp);
1727 inp->inp_laddr.s_addr = INADDR_ANY;
1728 INP_HASH_WUNLOCK(pcbinfo);
1729 SOCK_LOCK(so);
1730 so->so_state &= ~SS_ISCONNECTED; /* XXX */
1731 SOCK_UNLOCK(so);
1732 INP_WUNLOCK(inp);
1733 return (0);
1734 }
1735
1736 static int
udp_send(struct socket * so,int flags,struct mbuf * m,struct sockaddr * addr,struct mbuf * control,struct thread * td)1737 udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
1738 struct mbuf *control, struct thread *td)
1739 {
1740 struct inpcb *inp;
1741
1742 inp = sotoinpcb(so);
1743 KASSERT(inp != NULL, ("udp_send: inp == NULL"));
1744 return (udp_output(inp, m, addr, control, td, flags));
1745 }
1746 #endif /* INET */
1747
1748 int
udp_shutdown(struct socket * so)1749 udp_shutdown(struct socket *so)
1750 {
1751 struct inpcb *inp;
1752
1753 inp = sotoinpcb(so);
1754 KASSERT(inp != NULL, ("udp_shutdown: inp == NULL"));
1755 INP_WLOCK(inp);
1756 socantsendmore(so);
1757 INP_WUNLOCK(inp);
1758 return (0);
1759 }
1760
1761 #ifdef INET
1762 struct pr_usrreqs udp_usrreqs = {
1763 .pru_abort = udp_abort,
1764 .pru_attach = udp_attach,
1765 .pru_bind = udp_bind,
1766 .pru_connect = udp_connect,
1767 .pru_control = in_control,
1768 .pru_detach = udp_detach,
1769 .pru_disconnect = udp_disconnect,
1770 .pru_peeraddr = in_getpeeraddr,
1771 .pru_send = udp_send,
1772 .pru_soreceive = soreceive_dgram,
1773 .pru_sosend = sosend_dgram,
1774 .pru_shutdown = udp_shutdown,
1775 .pru_sockaddr = in_getsockaddr,
1776 .pru_sosetlabel = in_pcbsosetlabel,
1777 .pru_close = udp_close,
1778 };
1779 #endif /* INET */
1780